View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.BufferedWriter;
20  import java.io.IOException;
21  import java.io.InputStreamReader;
22  import java.io.OutputStream;
23  import java.io.OutputStreamWriter;
24  import java.io.Writer;
25  import java.nio.ByteBuffer;
26  import java.nio.CharBuffer;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetDecoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.nio.charset.StandardCharsets;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetDecoders;
37  
38  /**
39   * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
40   * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
41   * correctly.
42   * <p>
43   * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
44   * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
45   * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
46   * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
47   * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
48   * </p>
49   * <p>
50   * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
51   * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
52   * </p>
53   * <p>
54   * To build an instance, use {@link Builder}.
55   * </p>
56   * <pre>
57   * OutputStream out = ...
58   * Charset cs = ...
59   * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
60   * WriterOutputStream out2 = WriterOutputStream.builder()
61   *   .setWriter(writer)
62   *   .setCharset(cs)
63   *   .get();
64   * </pre>
65   * <p>
66   * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
67   * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
68   * {@link WriterOutputStream} pushes it to the underlying stream.
69   * </p>
70   * <p>
71   * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
72   * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
73   * known to represent character data that must be decoded for further use.
74   * </p>
75   * <p>
76   * Instances of {@link WriterOutputStream} are not thread safe.
77   * </p>
78   *
79   * @see Builder
80   * @see org.apache.commons.io.input.ReaderInputStream
81   * @since 2.0
82   */
83  public class WriterOutputStream extends OutputStream {
84  
85      // @formatter:off
86      /**
87       * Builds a new {@link WriterOutputStream}.
88       *
89       * <p>
90       * For example:
91       * </p>
92       * <pre>{@code
93       * WriterOutputStream s = WriterOutputStream.builder()
94       *   .setPath(path)
95       *   .setBufferSize(8192)
96       *   .setCharset(StandardCharsets.UTF_8)
97       *   .setWriteImmediately(false)
98       *   .get();}
99       * </pre>
100      *
101      * @see #get()
102      * @since 2.12.0
103      */
104     // @formatter:on
105     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106 
107         private CharsetDecoder charsetDecoder;
108         private boolean writeImmediately;
109 
110         /**
111          * Constructs a new builder of {@link WriterOutputStream}.
112          */
113         public Builder() {
114             this.charsetDecoder = getCharset().newDecoder();
115         }
116 
117         /**
118          * Builds a new {@link WriterOutputStream}.
119          * <p>
120          * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121          * </p>
122          * <p>
123          * This builder uses the following aspects:
124          * </p>
125          * <ul>
126          * <li>{@link #getWriter()}</li>
127          * <li>{@link #getBufferSize()}</li>
128          * <li>charsetDecoder</li>
129          * <li>writeImmediately</li>
130          * </ul>
131          *
132          * @return a new instance.
133          * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
134          * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
135          * @see #getWriter()
136          * @see #getUnchecked()
137          */
138         @Override
139         public WriterOutputStream get() throws IOException {
140             return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
141         }
142 
143         @Override
144         public Builder setCharset(final Charset charset) {
145             super.setCharset(charset);
146             this.charsetDecoder = getCharset().newDecoder();
147             return this;
148         }
149 
150         @Override
151         public Builder setCharset(final String charset) {
152             super.setCharset(charset);
153             this.charsetDecoder = getCharset().newDecoder();
154             return this;
155         }
156 
157         /**
158          * Sets the charset decoder.
159          *
160          * @param charsetDecoder the charset decoder.
161          * @return {@code this} instance.
162          */
163         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
164             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
165             super.setCharset(this.charsetDecoder.charset());
166             return this;
167         }
168 
169         /**
170          * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
171          * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
172          * {@link #close()} is called.
173          *
174          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
175          *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
176          *                         {@link #flush()} or {@link #close()} is called.
177          * @return {@code this} instance.
178          */
179         public Builder setWriteImmediately(final boolean writeImmediately) {
180             this.writeImmediately = writeImmediately;
181             return this;
182         }
183 
184     }
185 
186     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
187 
188     /**
189      * Constructs a new {@link Builder}.
190      *
191      * @return a new {@link Builder}.
192      * @since 2.12.0
193      */
194     public static Builder builder() {
195         return new Builder();
196     }
197 
198     /**
199      * Checks if the JDK in use properly supports the given charset.
200      *
201      * @param charset the charset to check the support for
202      */
203     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
204         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
205             return;
206         }
207         final String TEST_STRING_2 = "v\u00e9s";
208         final byte[] bytes = TEST_STRING_2.getBytes(charset);
209 
210         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
211         final ByteBuffer bb2 = ByteBuffer.allocate(16);
212         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
213         final int len = bytes.length;
214         for (int i = 0; i < len; i++) {
215             bb2.put(bytes[i]);
216             bb2.flip();
217             try {
218                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
219             } catch (final IllegalArgumentException e) {
220                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
221                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
222             }
223             bb2.compact();
224         }
225         cb2.rewind();
226         if (!TEST_STRING_2.equals(cb2.toString())) {
227             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
228                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
229         }
230 
231     }
232 
233     private final Writer writer;
234     private final CharsetDecoder decoder;
235 
236     private final boolean writeImmediately;
237 
238     /**
239      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
240      */
241     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
242 
243     /**
244      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
245      */
246     private final CharBuffer decoderOut;
247 
248     /**
249      * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@link Charset#defaultCharset() default charset} and with a default output
250      * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is
251      * called.
252      *
253      * @param writer the target {@link Writer}
254      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
255      */
256     @Deprecated
257     public WriterOutputStream(final Writer writer) {
258         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
259     }
260 
261     /**
262      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
263      * when it overflows or when {@link #flush()} or {@link #close()} is called.
264      *
265      * @param writer  the target {@link Writer}
266      * @param charset the charset encoding
267      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
268      */
269     @Deprecated
270     public WriterOutputStream(final Writer writer, final Charset charset) {
271         this(writer, charset, BUFFER_SIZE, false);
272     }
273 
274     /**
275      * Constructs a new {@link WriterOutputStream}.
276      *
277      * @param writer           the target {@link Writer}
278      * @param charset          the charset encoding
279      * @param bufferSize       the size of the output buffer in number of characters
280      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
281      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
282      *                         {@link #flush()} or {@link #close()} is called.
283      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
284      */
285     @Deprecated
286     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
287         // @formatter:off
288         this(writer,
289             Charsets.toCharset(charset).newDecoder()
290                     .onMalformedInput(CodingErrorAction.REPLACE)
291                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
292                     .replaceWith("?"),
293              bufferSize,
294              writeImmediately);
295         // @formatter:on
296     }
297 
298     /**
299      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
300      * when it overflows or when {@link #flush()} or {@link #close()} is called.
301      *
302      * @param writer  the target {@link Writer}
303      * @param decoder the charset decoder
304      * @since 2.1
305      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
306      */
307     @Deprecated
308     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
309         this(writer, decoder, BUFFER_SIZE, false);
310     }
311 
312     /**
313      * Constructs a new {@link WriterOutputStream}.
314      *
315      * @param writer           the target {@link Writer}
316      * @param decoder          the charset decoder
317      * @param bufferSize       the size of the output buffer in number of characters
318      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
319      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
320      *                         {@link #flush()} or {@link #close()} is called.
321      * @since 2.1
322      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
323      */
324     @Deprecated
325     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
326         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
327         this.writer = writer;
328         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
329         this.writeImmediately = writeImmediately;
330         this.decoderOut = CharBuffer.allocate(bufferSize);
331     }
332 
333     /**
334      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
335      * when it overflows or when {@link #flush()} or {@link #close()} is called.
336      *
337      * @param writer      the target {@link Writer}
338      * @param charsetName the name of the charset encoding
339      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
340      */
341     @Deprecated
342     public WriterOutputStream(final Writer writer, final String charsetName) {
343         this(writer, charsetName, BUFFER_SIZE, false);
344     }
345 
346     /**
347      * Constructs a new {@link WriterOutputStream}.
348      *
349      * @param writer           the target {@link Writer}
350      * @param charsetName      the name of the charset encoding
351      * @param bufferSize       the size of the output buffer in number of characters
352      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
353      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
354      *                         {@link #flush()} or {@link #close()} is called.
355      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
356      */
357     @Deprecated
358     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
359         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
360     }
361 
362     /**
363      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
364      * {@link Writer#close()} will be called.
365      *
366      * @throws IOException if an I/O error occurs.
367      */
368     @Override
369     public void close() throws IOException {
370         processInput(true);
371         flushOutput();
372         writer.close();
373     }
374 
375     /**
376      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
377      * {@link Writer#flush()} will be called.
378      *
379      * @throws IOException if an I/O error occurs.
380      */
381     @Override
382     public void flush() throws IOException {
383         flushOutput();
384         writer.flush();
385     }
386 
387     /**
388      * Flush the output.
389      *
390      * @throws IOException if an I/O error occurs.
391      */
392     private void flushOutput() throws IOException {
393         if (decoderOut.position() > 0) {
394             writer.write(decoderOut.array(), 0, decoderOut.position());
395             decoderOut.rewind();
396         }
397     }
398 
399     /**
400      * Decode the contents of the input ByteBuffer into a CharBuffer.
401      *
402      * @param endOfInput indicates end of input
403      * @throws IOException if an I/O error occurs.
404      */
405     private void processInput(final boolean endOfInput) throws IOException {
406         // Prepare decoderIn for reading
407         decoderIn.flip();
408         CoderResult coderResult;
409         while (true) {
410             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
411             if (coderResult.isOverflow()) {
412                 flushOutput();
413             } else if (coderResult.isUnderflow()) {
414                 break;
415             } else {
416                 // The decoder is configured to replace malformed input and unmappable characters,
417                 // so we should not get here.
418                 throw new IOException("Unexpected coder result");
419             }
420         }
421         // Discard the bytes that have been read
422         decoderIn.compact();
423     }
424 
425     /**
426      * Write bytes from the specified byte array to the stream.
427      *
428      * @param b the byte array containing the bytes to write
429      * @throws IOException if an I/O error occurs.
430      */
431     @Override
432     public void write(final byte[] b) throws IOException {
433         write(b, 0, b.length);
434     }
435 
436     /**
437      * Write bytes from the specified byte array to the stream.
438      *
439      * @param b   the byte array containing the bytes to write
440      * @param off the start offset in the byte array
441      * @param len the number of bytes to write
442      * @throws IOException if an I/O error occurs.
443      */
444     @Override
445     public void write(final byte[] b, int off, int len) throws IOException {
446         while (len > 0) {
447             final int c = Math.min(len, decoderIn.remaining());
448             decoderIn.put(b, off, c);
449             processInput(false);
450             len -= c;
451             off += c;
452         }
453         if (writeImmediately) {
454             flushOutput();
455         }
456     }
457 
458     /**
459      * Write a single byte to the stream.
460      *
461      * @param b the byte to write
462      * @throws IOException if an I/O error occurs.
463      */
464     @Override
465     public void write(final int b) throws IOException {
466         write(new byte[] { (byte) b }, 0, 1);
467     }
468 }