View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.BufferedWriter;
20  import java.io.IOException;
21  import java.io.InputStreamReader;
22  import java.io.OutputStream;
23  import java.io.OutputStreamWriter;
24  import java.io.Writer;
25  import java.nio.ByteBuffer;
26  import java.nio.CharBuffer;
27  import java.nio.charset.Charset;
28  import java.nio.charset.CharsetDecoder;
29  import java.nio.charset.CoderResult;
30  import java.nio.charset.CodingErrorAction;
31  import java.nio.charset.StandardCharsets;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.charset.CharsetDecoders;
37  
38  /**
39   * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
40   * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
41   * correctly.
42   * <p>
43   * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
44   * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
45   * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
46   * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
47   * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
48   * </p>
49   * <p>
50   * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
51   * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
52   * </p>
53   * <p>
54   * To build an instance, use {@link Builder}.
55   * </p>
56   * <pre>
57   * OutputStream out = ...
58   * Charset cs = ...
59   * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
60   * WriterOutputStream out2 = WriterOutputStream.builder()
61   *   .setWriter(writer)
62   *   .setCharset(cs)
63   *   .get();
64   * </pre>
65   * <p>
66   * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
67   * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
68   * {@link WriterOutputStream} pushes it to the underlying stream.
69   * </p>
70   * <p>
71   * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
72   * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
73   * known to represent character data that must be decoded for further use.
74   * </p>
75   * <p>
76   * Instances of {@link WriterOutputStream} are not thread safe.
77   * </p>
78   *
79   * @see Builder
80   * @see org.apache.commons.io.input.ReaderInputStream
81   * @since 2.0
82   */
83  public class WriterOutputStream extends OutputStream {
84  
85      // @formatter:off
86      /**
87       * Builds a new {@link WriterOutputStream}.
88       *
89       * <p>
90       * For example:
91       * </p>
92       * <pre>{@code
93       * WriterOutputStream s = WriterOutputStream.builder()
94       *   .setPath(path)
95       *   .setBufferSize(8192)
96       *   .setCharset(StandardCharsets.UTF_8)
97       *   .setWriteImmediately(false)
98       *   .get();}
99       * </pre>
100      *
101      * @see #get()
102      * @since 2.12.0
103      */
104     // @formatter:on
105     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106 
107         private CharsetDecoder charsetDecoder;
108         private boolean writeImmediately;
109 
110         /**
111          * Constructs a new builder of {@link WriterOutputStream}.
112          */
113         public Builder() {
114             this.charsetDecoder = getCharset().newDecoder();
115         }
116 
117         /**
118          * Builds a new {@link WriterOutputStream}.
119          * <p>
120          * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121          * </p>
122          * <p>
123          * This builder uses the following aspects:
124          * </p>
125          * <ul>
126          * <li>{@link #getWriter()}</li>
127          * <li>{@link #getBufferSize()}</li>
128          * <li>charsetDecoder</li>
129          * <li>writeImmediately</li>
130          * </ul>
131          *
132          * @return a new instance.
133          * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
134          * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
135          * @see #getWriter()
136          * @see #getUnchecked()
137          */
138         @Override
139         public WriterOutputStream get() throws IOException {
140             return new WriterOutputStream(this);
141         }
142 
143         @Override
144         public Builder setCharset(final Charset charset) {
145             super.setCharset(charset);
146             this.charsetDecoder = newDecoder(getCharset());
147             return this;
148         }
149 
150         @Override
151         public Builder setCharset(final String charset) {
152             super.setCharset(charset);
153             this.charsetDecoder = newDecoder(getCharset());
154             return this;
155         }
156 
157         /**
158          * Sets the charset decoder.
159          *
160          * @param charsetDecoder the charset decoder.
161          * @return {@code this} instance.
162          */
163         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
164             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
165             super.setCharset(this.charsetDecoder.charset());
166             return this;
167         }
168 
169         /**
170          * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
171          * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
172          * {@link #close()} is called.
173          *
174          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
175          *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
176          *                         {@link #flush()} or {@link #close()} is called.
177          * @return {@code this} instance.
178          */
179         public Builder setWriteImmediately(final boolean writeImmediately) {
180             this.writeImmediately = writeImmediately;
181             return this;
182         }
183 
184     }
185 
186     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
187 
188     /**
189      * Constructs a new {@link Builder}.
190      *
191      * @return a new {@link Builder}.
192      * @since 2.12.0
193      */
194     public static Builder builder() {
195         return new Builder();
196     }
197 
198     /**
199      * Checks if the JDK in use properly supports the given charset.
200      *
201      * @param charset the charset to check the support for.
202      */
203     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
204         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
205             return;
206         }
207         final String testString = "v\u00e9s";
208         final byte[] bytes = testString.getBytes(charset);
209 
210         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
211         final ByteBuffer bb2 = ByteBuffer.allocate(16);
212         final CharBuffer cb2 = CharBuffer.allocate(testString.length());
213         final int len = bytes.length;
214         for (int i = 0; i < len; i++) {
215             bb2.put(bytes[i]);
216             bb2.flip();
217             try {
218                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
219             } catch (final IllegalArgumentException e) {
220                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
221                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
222             }
223             bb2.compact();
224         }
225         cb2.rewind();
226         if (!testString.equals(cb2.toString())) {
227             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
228                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
229         }
230 
231     }
232 
233     private static CharsetDecoder newDecoder(final Charset charset) {
234         // @formatter:off
235         return Charsets.toCharset(charset).newDecoder()
236             .onMalformedInput(CodingErrorAction.REPLACE)
237             .onUnmappableCharacter(CodingErrorAction.REPLACE)
238             .replaceWith("?");
239         // @formatter:on
240     }
241 
242     private final Writer writer;
243 
244     private final CharsetDecoder decoder;
245 
246     private final boolean writeImmediately;
247 
248     /**
249      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
250      */
251     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
252 
253     /**
254      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
255      */
256     private final CharBuffer decoderOut;
257 
258     @SuppressWarnings("resource") // caller closes.
259     private WriterOutputStream(final Builder builder) throws IOException {
260         this(builder.getWriter(), builder.charsetDecoder, builder.getBufferSize(), builder.writeImmediately);
261     }
262 
263     /**
264      * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@linkplain Charset#defaultCharset() default charset} and with a default
265      * output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or
266      * {@link #close()} is called.
267      *
268      * @param writer the target {@link Writer}.
269      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
270      */
271     @Deprecated
272     public WriterOutputStream(final Writer writer) {
273         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
274     }
275 
276     /**
277      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
278      * when it overflows or when {@link #flush()} or {@link #close()} is called.
279      *
280      * @param writer  the target {@link Writer}.
281      * @param charset the charset encoding.
282      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
283      */
284     @Deprecated
285     public WriterOutputStream(final Writer writer, final Charset charset) {
286         this(writer, charset, BUFFER_SIZE, false);
287     }
288 
289     /**
290      * Constructs a new {@link WriterOutputStream}.
291      *
292      * @param writer           the target {@link Writer}.
293      * @param charset          the charset encoding.
294      * @param bufferSize       the size of the output buffer in number of characters.
295      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
296      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
297      *                         {@link #flush()} or {@link #close()} is called.
298      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
299      */
300     @Deprecated
301     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
302         this(writer, newDecoder(charset), bufferSize, writeImmediately);
303     }
304 
305     /**
306      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
307      * when it overflows or when {@link #flush()} or {@link #close()} is called.
308      *
309      * @param writer  the target {@link Writer}.
310      * @param decoder the charset decoder.
311      * @since 2.1
312      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
313      */
314     @Deprecated
315     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
316         this(writer, decoder, BUFFER_SIZE, false);
317     }
318 
319     /**
320      * Constructs a new {@link WriterOutputStream}.
321      *
322      * @param writer           the target {@link Writer}.
323      * @param decoder          the charset decoder.
324      * @param bufferSize       the size of the output buffer in number of characters.
325      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
326      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
327      *                         {@link #flush()} or {@link #close()} is called.
328      * @since 2.1
329      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
330      */
331     @Deprecated
332     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
333         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
334         this.writer = writer;
335         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
336         this.writeImmediately = writeImmediately;
337         this.decoderOut = CharBuffer.allocate(bufferSize);
338     }
339 
340     /**
341      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
342      * when it overflows or when {@link #flush()} or {@link #close()} is called.
343      *
344      * @param writer      the target {@link Writer}.
345      * @param charsetName the name of the charset encoding.
346      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
347      */
348     @Deprecated
349     public WriterOutputStream(final Writer writer, final String charsetName) {
350         this(writer, charsetName, BUFFER_SIZE, false);
351     }
352 
353     /**
354      * Constructs a new {@link WriterOutputStream}.
355      *
356      * @param writer           the target {@link Writer}.
357      * @param charsetName      the name of the charset encoding.
358      * @param bufferSize       the size of the output buffer in number of characters.
359      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
360      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
361      *                         {@link #flush()} or {@link #close()} is called.
362      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
363      */
364     @Deprecated
365     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
366         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
367     }
368 
369     /**
370      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
371      * {@link Writer#close()} will be called.
372      *
373      * @throws IOException if an I/O error occurs.
374      */
375     @Override
376     public void close() throws IOException {
377         processInput(true);
378         flushOutput();
379         writer.close();
380     }
381 
382     /**
383      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
384      * {@link Writer#flush()} will be called.
385      *
386      * @throws IOException if an I/O error occurs.
387      */
388     @Override
389     public void flush() throws IOException {
390         flushOutput();
391         writer.flush();
392     }
393 
394     /**
395      * Flush the output.
396      *
397      * @throws IOException if an I/O error occurs.
398      */
399     private void flushOutput() throws IOException {
400         if (decoderOut.position() > 0) {
401             writer.write(decoderOut.array(), 0, decoderOut.position());
402             decoderOut.rewind();
403         }
404     }
405 
406     /**
407      * Decode the contents of the input ByteBuffer into a CharBuffer.
408      *
409      * @param endOfInput indicates end of input.
410      * @throws IOException if an I/O error occurs.
411      */
412     private void processInput(final boolean endOfInput) throws IOException {
413         // Prepare decoderIn for reading
414         decoderIn.flip();
415         CoderResult coderResult;
416         while (true) {
417             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
418             if (coderResult.isOverflow()) {
419                 flushOutput();
420             } else if (coderResult.isUnderflow()) {
421                 break;
422             } else {
423                 // The decoder is configured to replace malformed input and unmappable characters,
424                 // so we should not get here.
425                 throw new IOException("Unexpected coder result");
426             }
427         }
428         // Discard the bytes that have been read
429         decoderIn.compact();
430     }
431 
432     /**
433      * Writes bytes from the specified byte array to the stream.
434      *
435      * @param b the byte array containing the bytes to write.
436      * @throws NullPointerException if the byte array is {@code null}.
437      * @throws IOException if an I/O error occurs.
438      */
439     @Override
440     public void write(final byte[] b) throws IOException {
441         write(b, 0, b.length);
442     }
443 
444     /**
445      * Writes bytes from the specified byte array to the stream.
446      *
447      * @param b   the byte array containing the bytes to write.
448      * @param off the start offset in the byte array.
449      * @param len the number of bytes to write.
450      * @throws NullPointerException      if the byte array is {@code null}.
451      * @throws IndexOutOfBoundsException if {@code off} or {@code len} are negative, or if {@code off + len} is greater than {@code b.length}.
452      * @throws IOException if an I/O error occurs.
453      */
454     @Override
455     public void write(final byte[] b, int off, int len) throws IOException {
456         IOUtils.checkFromIndexSize(b, off, len);
457         while (len > 0) {
458             final int c = Math.min(len, decoderIn.remaining());
459             decoderIn.put(b, off, c);
460             processInput(false);
461             len -= c;
462             off += c;
463         }
464         if (writeImmediately) {
465             flushOutput();
466         }
467     }
468 
469     /**
470      * Writes a single byte to the stream.
471      *
472      * @param b the byte to write.
473      * @throws IOException if an I/O error occurs.
474      */
475     @Override
476     public void write(final int b) throws IOException {
477         write(new byte[] { (byte) b }, 0, 1);
478     }
479 }