1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.output;
18
19 import java.io.BufferedWriter;
20 import java.io.IOException;
21 import java.io.InputStreamReader;
22 import java.io.OutputStream;
23 import java.io.OutputStreamWriter;
24 import java.io.Writer;
25 import java.nio.ByteBuffer;
26 import java.nio.CharBuffer;
27 import java.nio.charset.Charset;
28 import java.nio.charset.CharsetDecoder;
29 import java.nio.charset.CoderResult;
30 import java.nio.charset.CodingErrorAction;
31 import java.nio.charset.StandardCharsets;
32
33 import org.apache.commons.io.Charsets;
34 import org.apache.commons.io.IOUtils;
35 import org.apache.commons.io.build.AbstractStreamBuilder;
36 import org.apache.commons.io.charset.CharsetDecoders;
37
38 /**
39 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
40 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
41 * correctly.
42 * <p>
43 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
44 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
45 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
46 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
47 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
48 * </p>
49 * <p>
50 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
51 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
52 * </p>
53 * <p>
54 * To build an instance, use {@link Builder}.
55 * </p>
56 * <pre>
57 * OutputStream out = ...
58 * Charset cs = ...
59 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
60 * WriterOutputStream out2 = WriterOutputStream.builder()
61 * .setWriter(writer)
62 * .setCharset(cs)
63 * .get();
64 * </pre>
65 * <p>
66 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
67 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
68 * {@link WriterOutputStream} pushes it to the underlying stream.
69 * </p>
70 * <p>
71 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
72 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
73 * known to represent character data that must be decoded for further use.
74 * </p>
75 * <p>
76 * Instances of {@link WriterOutputStream} are not thread safe.
77 * </p>
78 *
79 * @see Builder
80 * @see org.apache.commons.io.input.ReaderInputStream
81 * @since 2.0
82 */
83 public class WriterOutputStream extends OutputStream {
84
85 // @formatter:off
86 /**
87 * Builds a new {@link WriterOutputStream}.
88 *
89 * <p>
90 * For example:
91 * </p>
92 * <pre>{@code
93 * WriterOutputStream s = WriterOutputStream.builder()
94 * .setPath(path)
95 * .setBufferSize(8192)
96 * .setCharset(StandardCharsets.UTF_8)
97 * .setWriteImmediately(false)
98 * .get();}
99 * </pre>
100 *
101 * @see #get()
102 * @since 2.12.0
103 */
104 // @formatter:on
105 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106
107 private CharsetDecoder charsetDecoder;
108 private boolean writeImmediately;
109
110 /**
111 * Constructs a new builder of {@link WriterOutputStream}.
112 */
113 public Builder() {
114 this.charsetDecoder = getCharset().newDecoder();
115 }
116
117 /**
118 * Builds a new {@link WriterOutputStream}.
119 * <p>
120 * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121 * </p>
122 * <p>
123 * This builder uses the following aspects:
124 * </p>
125 * <ul>
126 * <li>{@link #getWriter()}</li>
127 * <li>{@link #getBufferSize()}</li>
128 * <li>charsetDecoder</li>
129 * <li>writeImmediately</li>
130 * </ul>
131 *
132 * @return a new instance.
133 * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
134 * @throws IOException if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
135 * @see #getWriter()
136 * @see #getUnchecked()
137 */
138 @Override
139 public WriterOutputStream get() throws IOException {
140 return new WriterOutputStream(this);
141 }
142
143 @Override
144 public Builder setCharset(final Charset charset) {
145 super.setCharset(charset);
146 this.charsetDecoder = getCharset().newDecoder();
147 return this;
148 }
149
150 @Override
151 public Builder setCharset(final String charset) {
152 super.setCharset(charset);
153 this.charsetDecoder = getCharset().newDecoder();
154 return this;
155 }
156
157 /**
158 * Sets the charset decoder.
159 *
160 * @param charsetDecoder the charset decoder.
161 * @return {@code this} instance.
162 */
163 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
164 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
165 super.setCharset(this.charsetDecoder.charset());
166 return this;
167 }
168
169 /**
170 * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
171 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
172 * {@link #close()} is called.
173 *
174 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
175 * the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
176 * {@link #flush()} or {@link #close()} is called.
177 * @return {@code this} instance.
178 */
179 public Builder setWriteImmediately(final boolean writeImmediately) {
180 this.writeImmediately = writeImmediately;
181 return this;
182 }
183
184 }
185
186 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
187
188 /**
189 * Constructs a new {@link Builder}.
190 *
191 * @return a new {@link Builder}.
192 * @since 2.12.0
193 */
194 public static Builder builder() {
195 return new Builder();
196 }
197
198 /**
199 * Checks if the JDK in use properly supports the given charset.
200 *
201 * @param charset the charset to check the support for
202 */
203 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
204 if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
205 return;
206 }
207 final String TEST_STRING_2 = "v\u00e9s";
208 final byte[] bytes = TEST_STRING_2.getBytes(charset);
209
210 final CharsetDecoder charsetDecoder2 = charset.newDecoder();
211 final ByteBuffer bb2 = ByteBuffer.allocate(16);
212 final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
213 final int len = bytes.length;
214 for (int i = 0; i < len; i++) {
215 bb2.put(bytes[i]);
216 bb2.flip();
217 try {
218 charsetDecoder2.decode(bb2, cb2, i == len - 1);
219 } catch (final IllegalArgumentException e) {
220 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
221 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
222 }
223 bb2.compact();
224 }
225 cb2.rewind();
226 if (!TEST_STRING_2.equals(cb2.toString())) {
227 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
228 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
229 }
230
231 }
232
233 private final Writer writer;
234 private final CharsetDecoder decoder;
235
236 private final boolean writeImmediately;
237
238 /**
239 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
240 */
241 private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
242
243 /**
244 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
245 */
246 private final CharBuffer decoderOut;
247
248 @SuppressWarnings("resource") // caller closes.
249 private WriterOutputStream(final Builder builder) throws IOException {
250 this(builder.getWriter(), builder.charsetDecoder, builder.getBufferSize(), builder.writeImmediately);
251 }
252
253 /**
254 * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@link Charset#defaultCharset() default charset} and with a default output
255 * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is
256 * called.
257 *
258 * @param writer the target {@link Writer}
259 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
260 */
261 @Deprecated
262 public WriterOutputStream(final Writer writer) {
263 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
264 }
265
266 /**
267 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
268 * when it overflows or when {@link #flush()} or {@link #close()} is called.
269 *
270 * @param writer the target {@link Writer}
271 * @param charset the charset encoding
272 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
273 */
274 @Deprecated
275 public WriterOutputStream(final Writer writer, final Charset charset) {
276 this(writer, charset, BUFFER_SIZE, false);
277 }
278
279 /**
280 * Constructs a new {@link WriterOutputStream}.
281 *
282 * @param writer the target {@link Writer}
283 * @param charset the charset encoding
284 * @param bufferSize the size of the output buffer in number of characters
285 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
286 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
287 * {@link #flush()} or {@link #close()} is called.
288 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
289 */
290 @Deprecated
291 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
292 // @formatter:off
293 this(writer,
294 Charsets.toCharset(charset).newDecoder()
295 .onMalformedInput(CodingErrorAction.REPLACE)
296 .onUnmappableCharacter(CodingErrorAction.REPLACE)
297 .replaceWith("?"),
298 bufferSize,
299 writeImmediately);
300 // @formatter:on
301 }
302
303 /**
304 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
305 * when it overflows or when {@link #flush()} or {@link #close()} is called.
306 *
307 * @param writer the target {@link Writer}
308 * @param decoder the charset decoder
309 * @since 2.1
310 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
311 */
312 @Deprecated
313 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
314 this(writer, decoder, BUFFER_SIZE, false);
315 }
316
317 /**
318 * Constructs a new {@link WriterOutputStream}.
319 *
320 * @param writer the target {@link Writer}
321 * @param decoder the charset decoder
322 * @param bufferSize the size of the output buffer in number of characters
323 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
324 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
325 * {@link #flush()} or {@link #close()} is called.
326 * @since 2.1
327 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
328 */
329 @Deprecated
330 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
331 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
332 this.writer = writer;
333 this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
334 this.writeImmediately = writeImmediately;
335 this.decoderOut = CharBuffer.allocate(bufferSize);
336 }
337
338 /**
339 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
340 * when it overflows or when {@link #flush()} or {@link #close()} is called.
341 *
342 * @param writer the target {@link Writer}
343 * @param charsetName the name of the charset encoding
344 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
345 */
346 @Deprecated
347 public WriterOutputStream(final Writer writer, final String charsetName) {
348 this(writer, charsetName, BUFFER_SIZE, false);
349 }
350
351 /**
352 * Constructs a new {@link WriterOutputStream}.
353 *
354 * @param writer the target {@link Writer}
355 * @param charsetName the name of the charset encoding
356 * @param bufferSize the size of the output buffer in number of characters
357 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
358 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
359 * {@link #flush()} or {@link #close()} is called.
360 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
361 */
362 @Deprecated
363 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
364 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
365 }
366
367 /**
368 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
369 * {@link Writer#close()} will be called.
370 *
371 * @throws IOException if an I/O error occurs.
372 */
373 @Override
374 public void close() throws IOException {
375 processInput(true);
376 flushOutput();
377 writer.close();
378 }
379
380 /**
381 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
382 * {@link Writer#flush()} will be called.
383 *
384 * @throws IOException if an I/O error occurs.
385 */
386 @Override
387 public void flush() throws IOException {
388 flushOutput();
389 writer.flush();
390 }
391
392 /**
393 * Flush the output.
394 *
395 * @throws IOException if an I/O error occurs.
396 */
397 private void flushOutput() throws IOException {
398 if (decoderOut.position() > 0) {
399 writer.write(decoderOut.array(), 0, decoderOut.position());
400 decoderOut.rewind();
401 }
402 }
403
404 /**
405 * Decode the contents of the input ByteBuffer into a CharBuffer.
406 *
407 * @param endOfInput indicates end of input
408 * @throws IOException if an I/O error occurs.
409 */
410 private void processInput(final boolean endOfInput) throws IOException {
411 // Prepare decoderIn for reading
412 decoderIn.flip();
413 CoderResult coderResult;
414 while (true) {
415 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
416 if (coderResult.isOverflow()) {
417 flushOutput();
418 } else if (coderResult.isUnderflow()) {
419 break;
420 } else {
421 // The decoder is configured to replace malformed input and unmappable characters,
422 // so we should not get here.
423 throw new IOException("Unexpected coder result");
424 }
425 }
426 // Discard the bytes that have been read
427 decoderIn.compact();
428 }
429
430 /**
431 * Writes bytes from the specified byte array to the stream.
432 *
433 * @param b the byte array containing the bytes to write
434 * @throws IOException if an I/O error occurs.
435 */
436 @Override
437 public void write(final byte[] b) throws IOException {
438 write(b, 0, b.length);
439 }
440
441 /**
442 * Writes bytes from the specified byte array to the stream.
443 *
444 * @param b the byte array containing the bytes to write
445 * @param off the start offset in the byte array
446 * @param len the number of bytes to write
447 * @throws IOException if an I/O error occurs.
448 */
449 @Override
450 public void write(final byte[] b, int off, int len) throws IOException {
451 while (len > 0) {
452 final int c = Math.min(len, decoderIn.remaining());
453 decoderIn.put(b, off, c);
454 processInput(false);
455 len -= c;
456 off += c;
457 }
458 if (writeImmediately) {
459 flushOutput();
460 }
461 }
462
463 /**
464 * Writes a single byte to the stream.
465 *
466 * @param b the byte to write
467 * @throws IOException if an I/O error occurs.
468 */
469 @Override
470 public void write(final int b) throws IOException {
471 write(new byte[] { (byte) b }, 0, 1);
472 }
473 }