001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetEncoders; 037import org.apache.commons.io.function.Uncheck; 038 039/** 040 * Implements an {@link InputStream} to read bytes from String, StringBuffer, StringBuilder or CharBuffer, 041 * encoded using the specified Charset. The Charset defaults to Charset.defaultCharset(). 042 * <p> 043 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 044 * </p> 045 * <p> 046 * To build an instance, use {@link Builder}. 047 * </p> 048 * 049 * @see Builder 050 * @since 2.2 051 */ 052public class CharSequenceInputStream extends InputStream { 053 054 //@formatter:off 055 /** 056 * Builds a new {@link CharSequenceInputStream}. 057 * 058 * <p> 059 * For example: 060 * </p> 061 * <h2>Using a Charset</h2> 062 * <pre>{@code 063 * CharSequenceInputStream s = CharSequenceInputStream.builder() 064 * .setBufferSize(8192) 065 * .setCharSequence("String") 066 * .setCharset(Charset.defaultCharset()) 067 * .get();} 068 * </pre> 069 * <h2>Using a CharsetEncoder</h2> 070 * <pre>{@code 071 * CharSequenceInputStream s = CharSequenceInputStream.builder() 072 * .setBufferSize(8192) 073 * .setCharSequence("String") 074 * .setCharsetEncoder(Charset.defaultCharset().newEncoder() 075 * .onMalformedInput(CodingErrorAction.REPLACE) 076 * .onUnmappableCharacter(CodingErrorAction.REPLACE)) 077 * .get();} 078 * </pre> 079 * 080 * @see #get() 081 * @since 2.13.0 082 */ 083 //@formatter:on 084 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> { 085 086 private CharsetEncoder charsetEncoder = newEncoder(getCharset()); 087 088 /** 089 * Constructs a new builder of {@link CharSequenceInputStream}. 090 */ 091 public Builder() { 092 // empty 093 } 094 095 /** 096 * Builds a new {@link CharSequenceInputStream}. 097 * <p> 098 * You must set an aspect that supports {@link #getCharSequence()}, otherwise, this method throws an exception. 099 * </p> 100 * <p> 101 * This builder uses the following aspects: 102 * </p> 103 * <ul> 104 * <li>{@link #getCharSequence()} gets the target aspect.</li> 105 * <li>{@link #getBufferSize()}</li> 106 * <li>{@link CharsetEncoder}</li> 107 * </ul> 108 * 109 * @return a new instance. 110 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 111 * @see #getUnchecked() 112 */ 113 @Override 114 public CharSequenceInputStream get() { 115 return Uncheck.get(() -> new CharSequenceInputStream(this)); 116 } 117 118 CharsetEncoder getCharsetEncoder() { 119 return charsetEncoder; 120 } 121 122 @Override 123 public Builder setCharset(final Charset charset) { 124 super.setCharset(charset); 125 charsetEncoder = newEncoder(getCharset()); 126 return this; 127 } 128 129 /** 130 * Sets the charset encoder. Assumes that the caller has configured the encoder. 131 * 132 * @param newEncoder the charset encoder. 133 * @return {@code this} instance. 134 * @since 2.13.0 135 */ 136 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { 137 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); 138 super.setCharset(charsetEncoder.charset()); 139 return this; 140 } 141 142 } 143 144 private static final int NO_MARK = -1; 145 146 /** 147 * Constructs a new {@link Builder}. 148 * 149 * @return a new {@link Builder}. 150 * @since 2.12.0 151 */ 152 public static Builder builder() { 153 return new Builder(); 154 } 155 156 private static CharsetEncoder newEncoder(final Charset charset) { 157 // @formatter:off 158 return Charsets.toCharset(charset).newEncoder() 159 .onMalformedInput(CodingErrorAction.REPLACE) 160 .onUnmappableCharacter(CodingErrorAction.REPLACE); 161 // @formatter:on 162 } 163 164 private final ByteBuffer bBuf; 165 private int bBufMark; // position in bBuf 166 private final CharBuffer cBuf; 167 private int cBufMark; // position in cBuf 168 private final CharsetEncoder charsetEncoder; 169 170 private CharSequenceInputStream(final Builder builder) { 171 this.charsetEncoder = builder.charsetEncoder; 172 // Ensure that buffer is long enough to hold a complete character 173 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(builder.charsetEncoder, builder.getBufferSize())); 174 this.bBuf.flip(); 175 this.cBuf = CharBuffer.wrap(Uncheck.get(() -> builder.getCharSequence())); 176 this.cBufMark = NO_MARK; 177 this.bBufMark = NO_MARK; 178 try { 179 fillBuffer(); 180 } catch (final CharacterCodingException ex) { 181 // Reset everything without filling the buffer 182 // so the same exception can be thrown again later. 183 this.bBuf.clear(); 184 this.bBuf.flip(); 185 this.cBuf.rewind(); 186 } 187 } 188 189 /** 190 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 191 * 192 * @param cs the input character sequence. 193 * @param charset the character set name to use. 194 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 195 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 196 */ 197 @Deprecated 198 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 199 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 200 } 201 202 /** 203 * Constructs a new instance. 204 * 205 * @param cs the input character sequence. 206 * @param charset the character set name to use, null maps to the default Charset. 207 * @param bufferSize the buffer size to use. 208 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 209 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 210 */ 211 @Deprecated 212 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 213 this(builder().setCharSequence(cs).setCharset(charset).setBufferSize(bufferSize)); 214 } 215 216 /** 217 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 218 * 219 * @param cs the input character sequence. 220 * @param charset the character set name to use. 221 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 222 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 223 */ 224 @Deprecated 225 public CharSequenceInputStream(final CharSequence cs, final String charset) { 226 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 227 } 228 229 /** 230 * Constructs a new instance. 231 * 232 * @param cs the input character sequence. 233 * @param charset the character set name to use, null maps to the default Charset. 234 * @param bufferSize the buffer size to use. 235 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 236 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 237 */ 238 @Deprecated 239 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 240 this(cs, Charsets.toCharset(charset), bufferSize); 241 } 242 243 /** 244 * Gets a lower bound on the number of bytes remaining in the byte stream. 245 * 246 * @return the count of bytes that can be read without blocking (or returning EOF). 247 * @throws IOException if an error occurs (probably not possible). 248 */ 249 @Override 250 public int available() throws IOException { 251 return this.bBuf.remaining(); 252 } 253 254 @Override 255 public void close() throws IOException { 256 bBuf.position(bBuf.limit()); 257 } 258 259 /** 260 * Fills the byte output buffer from the input char buffer. 261 * 262 * @throws CharacterCodingException 263 * an error encoding data. 264 */ 265 private void fillBuffer() throws CharacterCodingException { 266 this.bBuf.compact(); 267 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 268 if (result.isError()) { 269 result.throwException(); 270 } 271 this.bBuf.flip(); 272 } 273 274 /** 275 * Gets the CharsetEncoder. 276 * 277 * @return the CharsetEncoder. 278 */ 279 CharsetEncoder getCharsetEncoder() { 280 return charsetEncoder; 281 } 282 283 /** 284 * {@inheritDoc} 285 * @param readLimit max read limit (ignored). 286 */ 287 @Override 288 public synchronized void mark(final int readLimit) { 289 this.cBufMark = this.cBuf.position(); 290 this.bBufMark = this.bBuf.position(); 291 this.cBuf.mark(); 292 this.bBuf.mark(); 293 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 294 // however the bBuf is re-used so that won't work 295 } 296 297 @Override 298 public boolean markSupported() { 299 return true; 300 } 301 302 @Override 303 public int read() throws IOException { 304 for (;;) { 305 if (this.bBuf.hasRemaining()) { 306 return this.bBuf.get() & 0xFF; 307 } 308 fillBuffer(); 309 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 310 return EOF; 311 } 312 } 313 } 314 315 @Override 316 public int read(final byte[] b) throws IOException { 317 return read(b, 0, b.length); 318 } 319 320 @Override 321 public int read(final byte[] array, int off, int len) throws IOException { 322 Objects.requireNonNull(array, "array"); 323 if (len < 0 || off + len > array.length) { 324 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 325 } 326 if (len == 0) { 327 return 0; // must return 0 for zero length read 328 } 329 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 330 return EOF; 331 } 332 int bytesRead = 0; 333 while (len > 0) { 334 if (this.bBuf.hasRemaining()) { 335 final int chunk = Math.min(this.bBuf.remaining(), len); 336 this.bBuf.get(array, off, chunk); 337 off += chunk; 338 len -= chunk; 339 bytesRead += chunk; 340 } else { 341 fillBuffer(); 342 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 343 break; 344 } 345 } 346 } 347 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 348 } 349 350 @Override 351 public synchronized void reset() throws IOException { 352 // 353 // This is not the most efficient implementation, as it re-encodes from the beginning. 354 // 355 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 356 // 357 // It should be possible to apply some optimizations however: 358 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 359 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 360 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 361 // restart from there. 362 // 363 if (this.cBufMark != NO_MARK) { 364 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 365 if (this.cBuf.position() != 0) { 366 this.charsetEncoder.reset(); 367 this.cBuf.rewind(); 368 this.bBuf.rewind(); 369 this.bBuf.limit(0); // rewind does not clear the buffer 370 while (this.cBuf.position() < this.cBufMark) { 371 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 372 this.bBuf.limit(0); 373 fillBuffer(); 374 } 375 } 376 if (this.cBuf.position() != this.cBufMark) { 377 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 378 "expected=" + this.cBufMark); 379 } 380 this.bBuf.position(this.bBufMark); 381 this.cBufMark = NO_MARK; 382 this.bBufMark = NO_MARK; 383 } 384 mark(0); 385 } 386 387 @Override 388 public long skip(long n) throws IOException { 389 // 390 // This could be made more efficient by using position to skip within the current buffer. 391 // 392 long skipped = 0; 393 while (n > 0 && available() > 0) { 394 this.read(); 395 n--; 396 skipped++; 397 } 398 return skipped; 399 } 400 401}