001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033/** 034 * {@link InputStream} implementation that can read from String, StringBuffer, 035 * StringBuilder or CharBuffer. 036 * <p> 037 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 038 * 039 * @since 2.2 040 */ 041public class CharSequenceInputStream extends InputStream { 042 043 private static final int BUFFER_SIZE = 2048; 044 045 private static final int NO_MARK = -1; 046 047 private final CharsetEncoder encoder; 048 private final CharBuffer cbuf; 049 private final ByteBuffer bbuf; 050 051 private int mark_cbuf; // position in cbuf 052 private int mark_bbuf; // position in bbuf 053 054 /** 055 * Constructor. 056 * 057 * @param cs the input character sequence 058 * @param charset the character set name to use 059 * @param bufferSize the buffer size to use. 060 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 061 */ 062 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 063 super(); 064 this.encoder = charset.newEncoder() 065 .onMalformedInput(CodingErrorAction.REPLACE) 066 .onUnmappableCharacter(CodingErrorAction.REPLACE); 067 // Ensure that buffer is long enough to hold a complete character 068 final float maxBytesPerChar = encoder.maxBytesPerChar(); 069 if (bufferSize < maxBytesPerChar) { 070 throw new IllegalArgumentException("Buffer size " + bufferSize + " is less than maxBytesPerChar " + 071 maxBytesPerChar); 072 } 073 this.bbuf = ByteBuffer.allocate(bufferSize); 074 this.bbuf.flip(); 075 this.cbuf = CharBuffer.wrap(cs); 076 this.mark_cbuf = NO_MARK; 077 this.mark_bbuf = NO_MARK; 078 } 079 080 /** 081 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)}. 082 * 083 * @param cs the input character sequence 084 * @param charset the character set name to use 085 * @param bufferSize the buffer size to use. 086 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 087 */ 088 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 089 this(cs, Charset.forName(charset), bufferSize); 090 } 091 092 /** 093 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, Charset, int)} 094 * with a buffer size of 2048. 095 * 096 * @param cs the input character sequence 097 * @param charset the character set name to use 098 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 099 */ 100 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 101 this(cs, charset, BUFFER_SIZE); 102 } 103 104 /** 105 * Constructor, calls {@link #CharSequenceInputStream(CharSequence, String, int)} 106 * with a buffer size of 2048. 107 * 108 * @param cs the input character sequence 109 * @param charset the character set name to use 110 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character 111 */ 112 public CharSequenceInputStream(final CharSequence cs, final String charset) { 113 this(cs, charset, BUFFER_SIZE); 114 } 115 116 /** 117 * Fills the byte output buffer from the input char buffer. 118 * 119 * @throws CharacterCodingException 120 * an error encoding data 121 */ 122 private void fillBuffer() throws CharacterCodingException { 123 this.bbuf.compact(); 124 final CoderResult result = this.encoder.encode(this.cbuf, this.bbuf, true); 125 if (result.isError()) { 126 result.throwException(); 127 } 128 this.bbuf.flip(); 129 } 130 131 @Override 132 public int read(final byte[] array, int off, int len) throws IOException { 133 Objects.requireNonNull(array, "array"); 134 if (len < 0 || (off + len) > array.length) { 135 throw new IndexOutOfBoundsException("Array Size=" + array.length + 136 ", offset=" + off + ", length=" + len); 137 } 138 if (len == 0) { 139 return 0; // must return 0 for zero length read 140 } 141 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 142 return EOF; 143 } 144 int bytesRead = 0; 145 while (len > 0) { 146 if (this.bbuf.hasRemaining()) { 147 final int chunk = Math.min(this.bbuf.remaining(), len); 148 this.bbuf.get(array, off, chunk); 149 off += chunk; 150 len -= chunk; 151 bytesRead += chunk; 152 } else { 153 fillBuffer(); 154 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 155 break; 156 } 157 } 158 } 159 return bytesRead == 0 && !this.cbuf.hasRemaining() ? EOF : bytesRead; 160 } 161 162 @Override 163 public int read() throws IOException { 164 for (;;) { 165 if (this.bbuf.hasRemaining()) { 166 return this.bbuf.get() & 0xFF; 167 } 168 fillBuffer(); 169 if (!this.bbuf.hasRemaining() && !this.cbuf.hasRemaining()) { 170 return EOF; 171 } 172 } 173 } 174 175 @Override 176 public int read(final byte[] b) throws IOException { 177 return read(b, 0, b.length); 178 } 179 180 @Override 181 public long skip(long n) throws IOException { 182 /* 183 * This could be made more efficient by using position to skip within the current buffer. 184 */ 185 long skipped = 0; 186 while (n > 0 && available() > 0) { 187 this.read(); 188 n--; 189 skipped++; 190 } 191 return skipped; 192 } 193 194 /** 195 * Return an estimate of the number of bytes remaining in the byte stream. 196 * @return the count of bytes that can be read without blocking (or returning EOF). 197 * 198 * @throws IOException if an error occurs (probably not possible) 199 */ 200 @Override 201 public int available() throws IOException { 202 // The cached entries are in bbuf; since encoding always creates at least one byte 203 // per character, we can add the two to get a better estimate (e.g. if bbuf is empty) 204 // Note that the previous implementation (2.4) could return zero even though there were 205 // encoded bytes still available. 206 return this.bbuf.remaining() + this.cbuf.remaining(); 207 } 208 209 @Override 210 public void close() throws IOException { 211 // noop 212 } 213 214 /** 215 * {@inheritDoc} 216 * @param readlimit max read limit (ignored) 217 */ 218 @Override 219 public synchronized void mark(final int readlimit) { 220 this.mark_cbuf = this.cbuf.position(); 221 this.mark_bbuf = this.bbuf.position(); 222 this.cbuf.mark(); 223 this.bbuf.mark(); 224 // It would be nice to be able to use mark & reset on the cbuf and bbuf; 225 // however the bbuf is re-used so that won't work 226 } 227 228 @Override 229 public synchronized void reset() throws IOException { 230 /* 231 * This is not the most efficient implementation, as it re-encodes from the beginning. 232 * 233 * Since the bbuf is re-used, in general it's necessary to re-encode the data. 234 * 235 * It should be possible to apply some optimisations however: 236 * + use mark/reset on the cbuf and bbuf. This would only work if the buffer had not been (re)filled since 237 * the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 238 * valid otherwise. + Try saving the state of the cbuf before each fillBuffer; it might be possible to 239 * restart from there. 240 */ 241 if (this.mark_cbuf != NO_MARK) { 242 // if cbuf is at 0, we have not started reading anything, so skip re-encoding 243 if (this.cbuf.position() != 0) { 244 this.encoder.reset(); 245 this.cbuf.rewind(); 246 this.bbuf.rewind(); 247 this.bbuf.limit(0); // rewind does not clear the buffer 248 while(this.cbuf.position() < this.mark_cbuf) { 249 this.bbuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 250 this.bbuf.limit(0); 251 fillBuffer(); 252 } 253 } 254 if (this.cbuf.position() != this.mark_cbuf) { 255 throw new IllegalStateException("Unexpected CharBuffer postion: actual=" + cbuf.position() + " " + 256 "expected=" + this.mark_cbuf); 257 } 258 this.bbuf.position(this.mark_bbuf); 259 this.mark_cbuf = NO_MARK; 260 this.mark_bbuf = NO_MARK; 261 } 262 } 263 264 @Override 265 public boolean markSupported() { 266 return true; 267 } 268 269}