001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031 032import org.apache.commons.io.Charsets; 033import org.apache.commons.io.IOUtils; 034import org.apache.commons.io.build.AbstractStreamBuilder; 035import org.apache.commons.io.charset.CharsetEncoders; 036import org.apache.commons.io.function.Uncheck; 037 038/** 039 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer, 040 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset(). 041 * <p> 042 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 043 * </p> 044 * <p> 045 * To build an instance, use {@link Builder}. 046 * </p> 047 * 048 * @see Builder 049 * @since 2.2 050 */ 051public class CharSequenceInputStream extends InputStream { 052 053 //@formatter:off 054 /** 055 * Builds a new {@link CharSequenceInputStream}. 056 * 057 * <p> 058 * For example: 059 * </p> 060 * <h2>Using a Charset</h2> 061 * <pre>{@code 062 * CharSequenceInputStream s = CharSequenceInputStream.builder() 063 * .setBufferSize(8192) 064 * .setCharSequence("String") 065 * .setCharset(Charset.defaultCharset()) 066 * .get();} 067 * </pre> 068 * <h2>Using a CharsetEncoder</h2> 069 * <pre>{@code 070 * CharSequenceInputStream s = CharSequenceInputStream.builder() 071 * .setBufferSize(8192) 072 * .setCharSequence("String") 073 * .setCharsetEncoder(Charset.defaultCharset().newEncoder() 074 * .onMalformedInput(CodingErrorAction.REPLACE) 075 * .onUnmappableCharacter(CodingErrorAction.REPLACE)) 076 * .get();} 077 * </pre> 078 * 079 * @see #get() 080 * @since 2.13.0 081 */ 082 //@formatter:on 083 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> { 084 085 private CharsetEncoder charsetEncoder = newEncoder(getCharset()); 086 087 /** 088 * Constructs a new builder of {@link CharSequenceInputStream}. 089 */ 090 public Builder() { 091 // empty 092 } 093 094 /** 095 * Builds a new {@link CharSequenceInputStream}. 096 * <p> 097 * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception. 098 * </p> 099 * <p> 100 * This builder uses the following aspects: 101 * </p> 102 * <ul> 103 * <li>{@link #getCharSequence()} gets the target aspect.</li> 104 * <li>{@link #getBufferSize()}</li> 105 * <li>{@link CharsetEncoder}</li> 106 * </ul> 107 * 108 * @return a new instance. 109 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 110 * @see #getUnchecked() 111 */ 112 @Override 113 public CharSequenceInputStream get() { 114 return Uncheck.get(() -> new CharSequenceInputStream(this)); 115 } 116 117 CharsetEncoder getCharsetEncoder() { 118 return charsetEncoder; 119 } 120 121 @Override 122 public Builder setCharset(final Charset charset) { 123 super.setCharset(charset); 124 charsetEncoder = newEncoder(getCharset()); 125 return this; 126 } 127 128 /** 129 * Sets the charset encoder. Assumes that the caller has configured the encoder. 130 * 131 * @param newEncoder the charset encoder. 132 * @return {@code this} instance. 133 * @since 2.13.0 134 */ 135 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { 136 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); 137 super.setCharset(charsetEncoder.charset()); 138 return this; 139 } 140 141 } 142 143 private static final int NO_MARK = -1; 144 145 /** 146 * Constructs a new {@link Builder}. 147 * 148 * @return a new {@link Builder}. 149 * @since 2.12.0 150 */ 151 public static Builder builder() { 152 return new Builder(); 153 } 154 155 private static CharsetEncoder newEncoder(final Charset charset) { 156 // @formatter:off 157 return Charsets.toCharset(charset).newEncoder() 158 .onMalformedInput(CodingErrorAction.REPLACE) 159 .onUnmappableCharacter(CodingErrorAction.REPLACE); 160 // @formatter:on 161 } 162 163 private final ByteBuffer bBuf; 164 private int bBufMark; // position in bBuf 165 private final CharBuffer cBuf; 166 private int cBufMark; // position in cBuf 167 private final CharsetEncoder charsetEncoder; 168 169 private CharSequenceInputStream(final Builder builder) { 170 this.charsetEncoder = builder.charsetEncoder; 171 // Ensure that buffer is long enough to hold a complete character 172 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize())); 173 this.bBuf.flip(); 174 this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence())); 175 this.cBufMark = NO_MARK; 176 this.bBufMark = NO_MARK; 177 try { 178 fillBuffer(); 179 } catch (final CharacterCodingException ex) { 180 // Reset everything without filling the buffer 181 // so the same exception can be thrown again later. 182 this.bBuf.clear(); 183 this.bBuf.flip(); 184 this.cBuf.rewind(); 185 } 186 } 187 188 /** 189 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 190 * 191 * @param cs the input character sequence. 192 * @param charset the character set name to use. 193 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 194 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 195 */ 196 @Deprecated 197 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 198 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 199 } 200 201 /** 202 * Constructs a new instance. 203 * 204 * @param cs the input character sequence. 205 * @param charset the character set name to use, null maps to the default Charset. 206 * @param bufferSize the buffer size to use. 207 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 208 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 209 */ 210 @Deprecated 211 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 212 this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize)); 213 } 214 215 /** 216 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 217 * 218 * @param cs the input character sequence. 219 * @param charset the character set name to use. 220 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 221 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 222 */ 223 @Deprecated 224 public CharSequenceInputStream(final CharSequence cs, final String charset) { 225 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 226 } 227 228 /** 229 * Constructs a new instance. 230 * 231 * @param cs the input character sequence. 232 * @param charset the character set name to use, null maps to the default Charset. 233 * @param bufferSize the buffer size to use. 234 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 235 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 236 */ 237 @Deprecated 238 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 239 this(cs, Charsets.toCharset(charset), bufferSize); 240 } 241 242 /** 243 * Gets a lower bound on the number of bytes remaining in the byte stream. 244 * 245 * @return the count of bytes that can be read without blocking (or returning EOF). 246 * @throws IOException if an error occurs (probably not possible). 247 */ 248 @Override 249 public int available() throws IOException { 250 return this.bBuf.remaining(); 251 } 252 253 @Override 254 public void close() throws IOException { 255 bBuf.position(bBuf.limit()); 256 } 257 258 /** 259 * Fills the byte output buffer from the input char buffer. 260 * 261 * @throws CharacterCodingException 262 * an error encoding data. 263 */ 264 private void fillBuffer() throws CharacterCodingException { 265 this.bBuf.compact(); 266 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 267 if (result.isError()) { 268 result.throwException(); 269 } 270 this.bBuf.flip(); 271 } 272 273 /** 274 * Gets the CharsetEncoder. 275 * 276 * @return the CharsetEncoder. 277 */ 278 CharsetEncoder getCharsetEncoder() { 279 return charsetEncoder; 280 } 281 282 /** 283 * {@inheritDoc} 284 * @param readLimit max read limit (ignored). 285 */ 286 @Override 287 public synchronized void mark(final int readLimit) { 288 this.cBufMark = this.cBuf.position(); 289 this.bBufMark = this.bBuf.position(); 290 this.cBuf.mark(); 291 this.bBuf.mark(); 292 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 293 // however the bBuf is re-used so that won't work 294 } 295 296 @Override 297 public boolean markSupported() { 298 return true; 299 } 300 301 @Override 302 public int read() throws IOException { 303 for (;;) { 304 if (this.bBuf.hasRemaining()) { 305 return this.bBuf.get() & 0xFF; 306 } 307 fillBuffer(); 308 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 309 return EOF; 310 } 311 } 312 } 313 314 @Override 315 public int read(final byte[] b) throws IOException { 316 return read(b, 0, b.length); 317 } 318 319 @Override 320 public int read(final byte[] array, int off, int len) throws IOException { 321 IOUtils.checkFromIndexSize(array, off, len); 322 if (len == 0) { 323 return 0; // must return 0 for zero length read 324 } 325 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 326 return EOF; 327 } 328 int bytesRead = 0; 329 while (len > 0) { 330 if (this.bBuf.hasRemaining()) { 331 final int chunk = Math.min(this.bBuf.remaining(), len); 332 this.bBuf.get(array, off, chunk); 333 off += chunk; 334 len -= chunk; 335 bytesRead += chunk; 336 } else { 337 fillBuffer(); 338 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 339 break; 340 } 341 } 342 } 343 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 344 } 345 346 @Override 347 public synchronized void reset() throws IOException { 348 // 349 // This is not the most efficient implementation, as it re-encodes from the beginning. 350 // 351 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 352 // 353 // It should be possible to apply some optimizations however: 354 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 355 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 356 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 357 // restart from there. 358 // 359 if (this.cBufMark != NO_MARK) { 360 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 361 if (this.cBuf.position() != 0) { 362 this.charsetEncoder.reset(); 363 this.cBuf.rewind(); 364 this.bBuf.rewind(); 365 this.bBuf.limit(0); // rewind does not clear the buffer 366 while (this.cBuf.position() < this.cBufMark) { 367 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 368 this.bBuf.limit(0); 369 fillBuffer(); 370 } 371 } 372 if (this.cBuf.position() != this.cBufMark) { 373 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 374 "expected=" + this.cBufMark); 375 } 376 this.bBuf.position(this.bBufMark); 377 this.cBufMark = NO_MARK; 378 this.bBufMark = NO_MARK; 379 } 380 mark(0); 381 } 382 383 @Override 384 public long skip(long n) throws IOException { 385 // 386 // This could be made more efficient by using position to skip within the current buffer. 387 // 388 long skipped = 0; 389 while (n > 0 && available() > 0) { 390 this.read(); 391 n--; 392 skipped++; 393 } 394 return skipped; 395 } 396 397}