001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.input; 018 019 import java.io.IOException; 020 import java.io.InputStream; 021 import java.io.Reader; 022 import java.nio.ByteBuffer; 023 import java.nio.CharBuffer; 024 import java.nio.charset.Charset; 025 import java.nio.charset.CharsetEncoder; 026 import java.nio.charset.CoderResult; 027 import java.nio.charset.CodingErrorAction; 028 029 /** 030 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 031 * and transforms it to a byte stream using a specified charset encoding. The stream 032 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 033 * encodings supported by the JRE are handled correctly. In particular for charsets such as 034 * UTF-16, the implementation ensures that one and only one byte order marker 035 * is produced. 036 * <p> 037 * Since in general it is not possible to predict the number of characters to be read from the 038 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 039 * the {@link Reader} are buffered. There is therefore no well defined correlation 040 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 041 * This also implies that in general there is no need to wrap the underlying {@link Reader} 042 * in a {@link java.io.BufferedReader}. 043 * <p> 044 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 045 * in the following example, reading from <tt>in2</tt> would return the same byte 046 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal 047 * with respect to the charset encoding): 048 * <pre> 049 * InputStream in = ... 050 * Charset cs = ... 051 * InputStreamReader reader = new InputStreamReader(in, cs); 052 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 053 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 054 * except that the control flow is reversed: both classes transform a character stream 055 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 056 * while {@link ReaderInputStream} pulls it from the underlying stream. 057 * <p> 058 * Note that while there are use cases where there is no alternative to using 059 * this class, very often the need to use this class is an indication of a flaw 060 * in the design of the code. This class is typically used in situations where an existing 061 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 062 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 063 * where this problem may appear is when implementing the {@link javax.activation.DataSource} 064 * interface from the Java Activation Framework. 065 * <p> 066 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 067 * read operation will block or not, it is not possible to provide a meaningful 068 * implementation of the {@link InputStream#available()} method. A call to this method 069 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 070 * <p> 071 * Instances of {@link ReaderInputStream} are not thread safe. 072 * 073 * @see org.apache.commons.io.output.WriterOutputStream 074 * 075 * @since 2.0 076 */ 077 public class ReaderInputStream extends InputStream { 078 private static final int DEFAULT_BUFFER_SIZE = 1024; 079 080 private final Reader reader; 081 private final CharsetEncoder encoder; 082 083 /** 084 * CharBuffer used as input for the decoder. It should be reasonably 085 * large as we read data from the underlying Reader into this buffer. 086 */ 087 private final CharBuffer encoderIn; 088 089 /** 090 * ByteBuffer used as output for the decoder. This buffer can be small 091 * as it is only used to transfer data from the decoder to the 092 * buffer provided by the caller. 093 */ 094 private final ByteBuffer encoderOut; 095 096 private CoderResult lastCoderResult; 097 private boolean endOfInput; 098 099 /** 100 * Construct a new {@link ReaderInputStream}. 101 * 102 * @param reader the target {@link Reader} 103 * @param encoder the charset encoder 104 * @since 2.1 105 */ 106 public ReaderInputStream(Reader reader, CharsetEncoder encoder) { 107 this(reader, encoder, DEFAULT_BUFFER_SIZE); 108 } 109 110 /** 111 * Construct a new {@link ReaderInputStream}. 112 * 113 * @param reader the target {@link Reader} 114 * @param encoder the charset encoder 115 * @param bufferSize the size of the input buffer in number of characters 116 * @since 2.1 117 */ 118 public ReaderInputStream(Reader reader, CharsetEncoder encoder, int bufferSize) { 119 this.reader = reader; 120 this.encoder = encoder; 121 this.encoderIn = CharBuffer.allocate(bufferSize); 122 this.encoderIn.flip(); 123 this.encoderOut = ByteBuffer.allocate(128); 124 this.encoderOut.flip(); 125 } 126 127 /** 128 * Construct a new {@link ReaderInputStream}. 129 * 130 * @param reader the target {@link Reader} 131 * @param charset the charset encoding 132 * @param bufferSize the size of the input buffer in number of characters 133 */ 134 public ReaderInputStream(Reader reader, Charset charset, int bufferSize) { 135 this(reader, 136 charset.newEncoder() 137 .onMalformedInput(CodingErrorAction.REPLACE) 138 .onUnmappableCharacter(CodingErrorAction.REPLACE), 139 bufferSize); 140 } 141 142 /** 143 * Construct a new {@link ReaderInputStream} with a default input buffer size of 144 * 1024 characters. 145 * 146 * @param reader the target {@link Reader} 147 * @param charset the charset encoding 148 */ 149 public ReaderInputStream(Reader reader, Charset charset) { 150 this(reader, charset, DEFAULT_BUFFER_SIZE); 151 } 152 153 /** 154 * Construct a new {@link ReaderInputStream}. 155 * 156 * @param reader the target {@link Reader} 157 * @param charsetName the name of the charset encoding 158 * @param bufferSize the size of the input buffer in number of characters 159 */ 160 public ReaderInputStream(Reader reader, String charsetName, int bufferSize) { 161 this(reader, Charset.forName(charsetName), bufferSize); 162 } 163 164 /** 165 * Construct a new {@link ReaderInputStream} with a default input buffer size of 166 * 1024 characters. 167 * 168 * @param reader the target {@link Reader} 169 * @param charsetName the name of the charset encoding 170 */ 171 public ReaderInputStream(Reader reader, String charsetName) { 172 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 173 } 174 175 /** 176 * Construct a new {@link ReaderInputStream} that uses the default character encoding 177 * with a default input buffer size of 1024 characters. 178 * 179 * @param reader the target {@link Reader} 180 */ 181 public ReaderInputStream(Reader reader) { 182 this(reader, Charset.defaultCharset()); 183 } 184 185 /** 186 * Fills the internal char buffer from the reader. 187 * 188 * @throws IOException 189 * If an I/O error occurs 190 */ 191 private void fillBuffer() throws IOException { 192 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 193 encoderIn.compact(); 194 int position = encoderIn.position(); 195 // We don't use Reader#read(CharBuffer) here because it is more efficient 196 // to write directly to the underlying char array (the default implementation 197 // copies data to a temporary char array). 198 int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 199 if (c == -1) { 200 endOfInput = true; 201 } else { 202 encoderIn.position(position+c); 203 } 204 encoderIn.flip(); 205 } 206 encoderOut.compact(); 207 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 208 encoderOut.flip(); 209 } 210 211 /** 212 * Read the specified number of bytes into an array. 213 * 214 * @param b the byte array to read into 215 * @param off the offset to start reading bytes into 216 * @param len the number of bytes to read 217 * @return the number of bytes read or <code>-1</code> 218 * if the end of the stream has been reached 219 * @throws IOException if an I/O error occurs 220 */ 221 @Override 222 public int read(byte[] b, int off, int len) throws IOException { 223 if (b == null) { 224 throw new NullPointerException("Byte array must not be null"); 225 } 226 if (len < 0 || off < 0 || (off + len) > b.length) { 227 throw new IndexOutOfBoundsException("Array Size=" + b.length + 228 ", offset=" + off + ", length=" + len); 229 } 230 int read = 0; 231 if (len == 0) { 232 return 0; // Always return 0 if len == 0 233 } 234 while (len > 0) { 235 if (encoderOut.hasRemaining()) { 236 int c = Math.min(encoderOut.remaining(), len); 237 encoderOut.get(b, off, c); 238 off += c; 239 len -= c; 240 read += c; 241 } else { 242 fillBuffer(); 243 if (endOfInput && !encoderOut.hasRemaining()) { 244 break; 245 } 246 } 247 } 248 return read == 0 && endOfInput ? -1 : read; 249 } 250 251 /** 252 * Read the specified number of bytes into an array. 253 * 254 * @param b the byte array to read into 255 * @return the number of bytes read or <code>-1</code> 256 * if the end of the stream has been reached 257 * @throws IOException if an I/O error occurs 258 */ 259 @Override 260 public int read(byte[] b) throws IOException { 261 return read(b, 0, b.length); 262 } 263 264 /** 265 * Read a single byte. 266 * 267 * @return either the byte read or <code>-1</code> if the end of the stream 268 * has been reached 269 * @throws IOException if an I/O error occurs 270 */ 271 @Override 272 public int read() throws IOException { 273 for (;;) { 274 if (encoderOut.hasRemaining()) { 275 return encoderOut.get() & 0xFF; 276 } else { 277 fillBuffer(); 278 if (endOfInput && !encoderOut.hasRemaining()) { 279 return -1; 280 } 281 } 282 } 283 } 284 285 /** 286 * Close the stream. This method will cause the underlying {@link Reader} 287 * to be closed. 288 * @throws IOException if an I/O error occurs 289 */ 290 @Override 291 public void close() throws IOException { 292 reader.close(); 293 } 294 }