001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031 032/** 033 * {@link InputStream} implementation that can read from String, StringBuffer, 034 * StringBuilder or CharBuffer. 035 * <p> 036 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 037 * 038 * @since 2.2 039 */ 040public class CharSequenceInputStream extends InputStream { 041 042 private static final int BUFFER_SIZE = 2048; 043 044 private static final int NO_MARK = -1; 045 046 private final CharsetEncoder encoder; 047 private final CharBuffer cbuf; 048 private final ByteBuffer bbuf; 049 050 private int mark_cbuf; // position in cbuf 051 private int mark_bbuf; // position in bbuf 052 053 /** 054 * Constructor. 055 * 056 * @param cs the input character sequence 057 * @param charset the character set name to use 058 * @param bufferSize the buffer size to use. 059 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 060 */ 061 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 062 super(); 063 this.encoder = charset.newEncoder() 064 .onMalformedInput(CodingErrorAction.REPLACE) 065 .onUnmappableCharacter(CodingErrorAction.REPLACE); 066 // Ensure that buffer is long enough to hold a complete character 067 final float maxBytesPerChar = encoder.maxBytesPerChar(); 068 if (bufferSize < maxBytesPerChar) { 069 throw new IllegalArgumentException("Buffer size " + bufferSize + " is less than maxBytesPerChar " + 070 maxBytesPerChar); 071 } 072 this.bbuf = ByteBuffer.allocate(bufferSize); 073 this.bbuf.flip(); 074 this.cbuf = CharBuffer.wrap(cs); 075 this.mark_cbuf = NO_MARK; 076 this.mark_bbuf = NO_MARK; 077 } 078 079 /** 080 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}. 081 * 082 * @param cs the input character sequence 083 * @param charset the character set name to use 084 * @param bufferSize the buffer size to use. 085 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 086 */ 087 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 088 this(cs, Charset.forName(charset), bufferSize); 089 } 090 091 /** 092 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)} 093 * with a buffer size of 2048. 094 * 095 * @param cs the input character sequence 096 * @param charset the character set name to use 097 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 098 */ 099 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 100 this(cs, charset, BUFFER_SIZE); 101 } 102 103 /** 104 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, String, int)} 105 * with a buffer size of 2048. 106 * 107 * @param cs the input character sequence 108 * @param charset the character set name to use 109 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 110 */ 111 public CharSequenceInputStream(final CharSequence cs, final String charset) { 112 this(cs, charset, BUFFER_SIZE); 113 } 114 115 /** 116 * Fills the byte output buffer from the input char buffer. 117 * 118 * @throws CharacterCodingException 119 * an error encoding data 120 */ 121 private void fillBuffer() throws CharacterCodingException { 122 this.bbuf.compact(); 123 final CoderResult result = this.encoder.encode(this.cbuf, this.bbuf, true); 124 if (result.isError()) { 125 result.throwException(); 126 } 127 this.bbuf.flip(); 128 } 129 130 @Override 131 public int read(final byte[] b, int off, int len) throws IOException { 132 if (b == null) { 133 throw new NullPointerException("Byte array is null"); 134 } 135 if (len < 0 || (off + len) > b.length) { 136 throw new IndexOutOfBoundsException("Array Size=" + b.length + 137 ", offset=" + off + ", length=" + len); 138 } 139 if (len == 0) { 140 return 0; // must return 0 for zero length read 141 } 142 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 143 return EOF; 144 } 145 int bytesRead = 0; 146 while (len > 0) { 147 if (this.bbuf.hasRemaining()) { 148 final int chunk = Math.min(this.bbuf.remaining(), len); 149 this.bbuf.get(b, off, chunk); 150 off += chunk; 151 len -= chunk; 152 bytesRead += chunk; 153 } else { 154 fillBuffer(); 155 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 156 break; 157 } 158 } 159 } 160 return bytesRead == 0 && !this.cbuf.hasRemaining() ? EOF : bytesRead; 161 } 162 163 @Override 164 public int read() throws IOException { 165 for (;;) { 166 if (this.bbuf.hasRemaining()) { 167 return this.bbuf.get() & 0xFF; 168 } 169 fillBuffer(); 170 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 171 return EOF; 172 } 173 } 174 } 175 176 @Override 177 public int read(final byte[] b) throws IOException { 178 return read(b, 0, b.length); 179 } 180 181 @Override 182 public long skip(long n) throws IOException { 183 /* 184 * This could be made more efficient by using position to skip within the current buffer. 185 */ 186 long skipped = 0; 187 while (n > 0 && available() > 0) { 188 this.read(); 189 n--; 190 skipped++; 191 } 192 return skipped; 193 } 194 195 /** 196 * Return an estimate of the number of bytes remaining in the byte stream. 197 * @return the count of bytes that can be read without blocking (or returning EOF). 198 * 199 * @throws IOException if an error occurs (probably not possible) 200 */ 201 @Override 202 public int available() throws IOException { 203 // The cached entries are in bbuf; since encoding always creates at least one byte 204 // per character, we can add the two to get a better estimate (e.g. if bbuf is empty) 205 // Note that the previous implementation (2.4) could return zero even though there were 206 // encoded bytes still available. 207 return this.bbuf.remaining() + this.cbuf.remaining(); 208 } 209 210 @Override 211 public void close() throws IOException { 212 } 213 214 /** 215 * {@inheritDoc} 216 * @param readlimit max read limit (ignored) 217 */ 218 @Override 219 public synchronized void mark(final int readlimit) { 220 this.mark_cbuf = this.cbuf.position(); 221 this.mark_bbuf = this.bbuf.position(); 222 this.cbuf.mark(); 223 this.bbuf.mark(); 224 // It would be nice to be able to use mark & reset on the cbuf and bbuf; 225 // however the bbuf is re-used so that won't work 226 } 227 228 @Override 229 public synchronized void reset() throws IOException { 230 /* 231 * This is not the most efficient implementation, as it re-encodes from the beginning. 232 * 233 * Since the bbuf is re-used, in general it's necessary to re-encode the data. 234 * 235 * It should be possible to apply some optimisations however: 236 * + use mark/reset on the cbuf and bbuf. This would only work if the buffer had not been (re)filled since 237 * the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 238 * valid otherwise. + Try saving the state of the cbuf before each fillBuffer; it might be possible to 239 * restart from there. 240 */ 241 if (this.mark_cbuf != NO_MARK) { 242 // if cbuf is at 0, we have not started reading anything, so skip re-encoding 243 if (this.cbuf.position() != 0) { 244 this.encoder.reset(); 245 this.cbuf.rewind(); 246 this.bbuf.rewind(); 247 this.bbuf.limit(0); // rewind does not clear the buffer 248 while(this.cbuf.position() < this.mark_cbuf) { 249 this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 250 this.bbuf.limit(0); 251 fillBuffer(); 252 } 253 } 254 if (this.cbuf.position() != this.mark_cbuf) { 255 throw new IllegalStateException("Unexpected CharBuffer postion: actual=" + cbuf.position() + " " + 256 "expected=" + this.mark_cbuf); 257 } 258 this.bbuf.position(this.mark_bbuf); 259 this.mark_cbuf = NO_MARK; 260 this.mark_bbuf = NO_MARK; 261 } 262 } 263 264 @Override 265 public boolean markSupported() { 266 return true; 267 } 268 269}