001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.io.Writer; 022import java.nio.ByteBuffer; 023import java.nio.CharBuffer; 024import java.nio.charset.Charset; 025import java.nio.charset.CharsetDecoder; 026import java.nio.charset.CoderResult; 027import java.nio.charset.CodingErrorAction; 028import java.nio.charset.StandardCharsets; 029 030import org.apache.commons.io.Charsets; 031import org.apache.commons.io.IOUtils; 032import org.apache.commons.io.build.AbstractStreamBuilder; 033import org.apache.commons.io.charset.CharsetDecoders; 034 035/** 036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to 037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled 038 * correctly. 039 * <p> 040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in 041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()} 042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can 043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer}, 044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}. 045 * </p> 046 * <p> 047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2} 048 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding): 049 * </p> 050 * <p> 051 * To build an instance, use {@link Builder}. 052 * </p> 053 * <pre> 054 * OutputStream out = ... 055 * Charset cs = ... 056 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 057 * WriterOutputStream out2 = WriterOutputStream.builder() 058 * .setWriter(writer) 059 * .setCharset(cs) 060 * .get(); 061 * </pre> 062 * <p> 063 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes 064 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while 065 * {@link WriterOutputStream} pushes it to the underlying stream. 066 * </p> 067 * <p> 068 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in 069 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is 070 * known to represent character data that must be decoded for further use. 071 * </p> 072 * <p> 073 * Instances of {@link WriterOutputStream} are not thread safe. 074 * </p> 075 * 076 * @see Builder 077 * @see org.apache.commons.io.input.ReaderInputStream 078 * @since 2.0 079 */ 080public class WriterOutputStream extends OutputStream { 081 082 // @formatter:off 083 /** 084 * Builds a new {@link WriterOutputStream}. 085 * 086 * <p> 087 * For example: 088 * </p> 089 * <pre>{@code 090 * WriterOutputStream s = WriterOutputStream.builder() 091 * .setPath(path) 092 * .setBufferSize(8192) 093 * .setCharset(StandardCharsets.UTF_8) 094 * .setWriteImmediately(false) 095 * .get();} 096 * </pre> 097 * 098 * @see #get() 099 * @since 2.12.0 100 */ 101 // @formatter:on 102 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> { 103 104 private CharsetDecoder charsetDecoder; 105 private boolean writeImmediately; 106 107 /** 108 * Constructs a new Builder. 109 */ 110 public Builder() { 111 this.charsetDecoder = getCharset().newDecoder(); 112 } 113 114 /** 115 * Builds a new {@link WriterOutputStream}. 116 * <p> 117 * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception. 118 * </p> 119 * <p> 120 * This builder use the following aspects: 121 * </p> 122 * <ul> 123 * <li>{@link #getWriter()}</li> 124 * <li>{@link #getBufferSize()}</li> 125 * <li>charsetDecoder</li> 126 * <li>writeImmediately</li> 127 * </ul> 128 * 129 * @return a new instance. 130 * @throws UnsupportedOperationException if the origin cannot provide a Writer. 131 * @see #getWriter() 132 */ 133 @SuppressWarnings("resource") 134 @Override 135 public WriterOutputStream get() throws IOException { 136 return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately); 137 } 138 139 @Override 140 public Builder setCharset(final Charset charset) { 141 super.setCharset(charset); 142 this.charsetDecoder = getCharset().newDecoder(); 143 return this; 144 } 145 146 @Override 147 public Builder setCharset(final String charset) { 148 super.setCharset(charset); 149 this.charsetDecoder = getCharset().newDecoder(); 150 return this; 151 } 152 153 /** 154 * Sets the charset decoder. 155 * 156 * @param charsetDecoder the charset decoder. 157 * @return this 158 */ 159 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) { 160 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder(); 161 super.setCharset(this.charsetDecoder.charset()); 162 return this; 163 } 164 165 /** 166 * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying 167 * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} 168 * is called. 169 * 170 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 171 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 172 * {@link #flush()} or {@link #close()} is called. 173 * @return this 174 */ 175 public Builder setWriteImmediately(final boolean writeImmediately) { 176 this.writeImmediately = writeImmediately; 177 return this; 178 } 179 180 } 181 182 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 183 184 /** 185 * Constructs a new {@link Builder}. 186 * 187 * @return a new {@link Builder}. 188 * @since 2.12.0 189 */ 190 public static Builder builder() { 191 return new Builder(); 192 } 193 194 /** 195 * Checks if the JDK in use properly supports the given charset. 196 * 197 * @param charset the charset to check the support for 198 */ 199 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) { 200 if (!StandardCharsets.UTF_16.name().equals(charset.name())) { 201 return; 202 } 203 final String TEST_STRING_2 = "v\u00e9s"; 204 final byte[] bytes = TEST_STRING_2.getBytes(charset); 205 206 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 207 final ByteBuffer bb2 = ByteBuffer.allocate(16); 208 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length()); 209 final int len = bytes.length; 210 for (int i = 0; i < len; i++) { 211 bb2.put(bytes[i]); 212 bb2.flip(); 213 try { 214 charsetDecoder2.decode(bb2, cb2, i == len - 1); 215 } catch (final IllegalArgumentException e) { 216 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 217 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 218 } 219 bb2.compact(); 220 } 221 cb2.rewind(); 222 if (!TEST_STRING_2.equals(cb2.toString())) { 223 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 224 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 225 } 226 227 } 228 229 private final Writer writer; 230 private final CharsetDecoder decoder; 231 232 private final boolean writeImmediately; 233 234 /** 235 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder. 236 */ 237 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 238 239 /** 240 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer. 241 */ 242 private final CharBuffer decoderOut; 243 244 /** 245 * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE} 246 * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called. 247 * 248 * @param writer the target {@link Writer} 249 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 250 */ 251 @Deprecated 252 public WriterOutputStream(final Writer writer) { 253 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 254 } 255 256 /** 257 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 258 * when it overflows or when {@link #flush()} or {@link #close()} is called. 259 * 260 * @param writer the target {@link Writer} 261 * @param charset the charset encoding 262 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 263 */ 264 @Deprecated 265 public WriterOutputStream(final Writer writer, final Charset charset) { 266 this(writer, charset, BUFFER_SIZE, false); 267 } 268 269 /** 270 * Constructs a new {@link WriterOutputStream}. 271 * 272 * @param writer the target {@link Writer} 273 * @param charset the charset encoding 274 * @param bufferSize the size of the output buffer in number of characters 275 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 276 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 277 * {@link #flush()} or {@link #close()} is called. 278 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 279 */ 280 @Deprecated 281 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) { 282 // @formatter:off 283 this(writer, 284 Charsets.toCharset(charset).newDecoder() 285 .onMalformedInput(CodingErrorAction.REPLACE) 286 .onUnmappableCharacter(CodingErrorAction.REPLACE) 287 .replaceWith("?"), 288 bufferSize, 289 writeImmediately); 290 // @formatter:on 291 } 292 293 /** 294 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 295 * when it overflows or when {@link #flush()} or {@link #close()} is called. 296 * 297 * @param writer the target {@link Writer} 298 * @param decoder the charset decoder 299 * @since 2.1 300 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 301 */ 302 @Deprecated 303 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 304 this(writer, decoder, BUFFER_SIZE, false); 305 } 306 307 /** 308 * Constructs a new {@link WriterOutputStream}. 309 * 310 * @param writer the target {@link Writer} 311 * @param decoder the charset decoder 312 * @param bufferSize the size of the output buffer in number of characters 313 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 314 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 315 * {@link #flush()} or {@link #close()} is called. 316 * @since 2.1 317 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 318 */ 319 @Deprecated 320 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) { 321 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset()); 322 this.writer = writer; 323 this.decoder = CharsetDecoders.toCharsetDecoder(decoder); 324 this.writeImmediately = writeImmediately; 325 this.decoderOut = CharBuffer.allocate(bufferSize); 326 } 327 328 /** 329 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 330 * when it overflows or when {@link #flush()} or {@link #close()} is called. 331 * 332 * @param writer the target {@link Writer} 333 * @param charsetName the name of the charset encoding 334 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 335 */ 336 @Deprecated 337 public WriterOutputStream(final Writer writer, final String charsetName) { 338 this(writer, charsetName, BUFFER_SIZE, false); 339 } 340 341 /** 342 * Constructs a new {@link WriterOutputStream}. 343 * 344 * @param writer the target {@link Writer} 345 * @param charsetName the name of the charset encoding 346 * @param bufferSize the size of the output buffer in number of characters 347 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the 348 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 349 * {@link #flush()} or {@link #close()} is called. 350 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 351 */ 352 @Deprecated 353 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) { 354 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately); 355 } 356 357 /** 358 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 359 * {@link Writer#close()} will be called. 360 * 361 * @throws IOException if an I/O error occurs. 362 */ 363 @Override 364 public void close() throws IOException { 365 processInput(true); 366 flushOutput(); 367 writer.close(); 368 } 369 370 /** 371 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 372 * {@link Writer#flush()} will be called. 373 * 374 * @throws IOException if an I/O error occurs. 375 */ 376 @Override 377 public void flush() throws IOException { 378 flushOutput(); 379 writer.flush(); 380 } 381 382 /** 383 * Flush the output. 384 * 385 * @throws IOException if an I/O error occurs. 386 */ 387 private void flushOutput() throws IOException { 388 if (decoderOut.position() > 0) { 389 writer.write(decoderOut.array(), 0, decoderOut.position()); 390 decoderOut.rewind(); 391 } 392 } 393 394 /** 395 * Decode the contents of the input ByteBuffer into a CharBuffer. 396 * 397 * @param endOfInput indicates end of input 398 * @throws IOException if an I/O error occurs. 399 */ 400 private void processInput(final boolean endOfInput) throws IOException { 401 // Prepare decoderIn for reading 402 decoderIn.flip(); 403 CoderResult coderResult; 404 while (true) { 405 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 406 if (coderResult.isOverflow()) { 407 flushOutput(); 408 } else if (coderResult.isUnderflow()) { 409 break; 410 } else { 411 // The decoder is configured to replace malformed input and unmappable characters, 412 // so we should not get here. 413 throw new IOException("Unexpected coder result"); 414 } 415 } 416 // Discard the bytes that have been read 417 decoderIn.compact(); 418 } 419 420 /** 421 * Write bytes from the specified byte array to the stream. 422 * 423 * @param b the byte array containing the bytes to write 424 * @throws IOException if an I/O error occurs. 425 */ 426 @Override 427 public void write(final byte[] b) throws IOException { 428 write(b, 0, b.length); 429 } 430 431 /** 432 * Write bytes from the specified byte array to the stream. 433 * 434 * @param b the byte array containing the bytes to write 435 * @param off the start offset in the byte array 436 * @param len the number of bytes to write 437 * @throws IOException if an I/O error occurs. 438 */ 439 @Override 440 public void write(final byte[] b, int off, int len) throws IOException { 441 while (len > 0) { 442 final int c = Math.min(len, decoderIn.remaining()); 443 decoderIn.put(b, off, c); 444 processInput(false); 445 len -= c; 446 off += c; 447 } 448 if (writeImmediately) { 449 flushOutput(); 450 } 451 } 452 453 /** 454 * Write a single byte to the stream. 455 * 456 * @param b the byte to write 457 * @throws IOException if an I/O error occurs. 458 */ 459 @Override 460 public void write(final int b) throws IOException { 461 write(new byte[] { (byte) b }, 0, 1); 462 } 463}