View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.IOException;
20  import java.io.OutputStream;
21  import java.io.Writer;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.Charset;
25  import java.nio.charset.CharsetDecoder;
26  import java.nio.charset.CoderResult;
27  import java.nio.charset.CodingErrorAction;
28  import java.nio.charset.StandardCharsets;
29  
30  import org.apache.commons.io.Charsets;
31  import org.apache.commons.io.IOUtils;
32  import org.apache.commons.io.build.AbstractStreamBuilder;
33  import org.apache.commons.io.charset.CharsetDecoders;
34  
35  /**
36   * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
37   * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
38   * correctly.
39   * <p>
40   * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
41   * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
42   * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can
43   * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
44   * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
45   * </p>
46   * <p>
47   * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2}
48   * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
49   * </p>
50   * <p>
51   * To build an instance, use {@link Builder}.
52   * </p>
53   * <pre>
54   * OutputStream out = ...
55   * Charset cs = ...
56   * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
57   * WriterOutputStream out2 = WriterOutputStream.builder()
58   *   .setWriter(writer)
59   *   .setCharset(cs)
60   *   .get();
61   * </pre>
62   * <p>
63   * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes
64   * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while
65   * {@link WriterOutputStream} pushes it to the underlying stream.
66   * </p>
67   * <p>
68   * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
69   * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
70   * known to represent character data that must be decoded for further use.
71   * </p>
72   * <p>
73   * Instances of {@link WriterOutputStream} are not thread safe.
74   * </p>
75   *
76   * @see Builder
77   * @see org.apache.commons.io.input.ReaderInputStream
78   * @since 2.0
79   */
80  public class WriterOutputStream extends OutputStream {
81  
82      // @formatter:off
83      /**
84       * Builds a new {@link WriterOutputStream}.
85       *
86       * <p>
87       * For example:
88       * </p>
89       * <pre>{@code
90       * WriterOutputStream s = WriterOutputStream.builder()
91       *   .setPath(path)
92       *   .setBufferSize(8192)
93       *   .setCharset(StandardCharsets.UTF_8)
94       *   .setWriteImmediately(false)
95       *   .get();}
96       * </pre>
97       *
98       * @see #get()
99       * @since 2.12.0
100      */
101     // @formatter:on
102     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
103 
104         private CharsetDecoder charsetDecoder;
105         private boolean writeImmediately;
106 
107         /**
108          * Constructs a new Builder.
109          */
110         public Builder() {
111             this.charsetDecoder = getCharset().newDecoder();
112         }
113 
114         /**
115          * Builds a new {@link WriterOutputStream}.
116          * <p>
117          * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
118          * </p>
119          * <p>
120          * This builder use the following aspects:
121          * </p>
122          * <ul>
123          * <li>{@link #getWriter()}</li>
124          * <li>{@link #getBufferSize()}</li>
125          * <li>charsetDecoder</li>
126          * <li>writeImmediately</li>
127          * </ul>
128          *
129          * @return a new instance.
130          * @throws UnsupportedOperationException if the origin cannot provide a Writer.
131          * @see #getWriter()
132          */
133         @SuppressWarnings("resource")
134         @Override
135         public WriterOutputStream get() throws IOException {
136             return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
137         }
138 
139         @Override
140         public Builder setCharset(final Charset charset) {
141             super.setCharset(charset);
142             this.charsetDecoder = getCharset().newDecoder();
143             return this;
144         }
145 
146         @Override
147         public Builder setCharset(final String charset) {
148             super.setCharset(charset);
149             this.charsetDecoder = getCharset().newDecoder();
150             return this;
151         }
152 
153         /**
154          * Sets the charset decoder.
155          *
156          * @param charsetDecoder the charset decoder.
157          * @return this
158          */
159         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
160             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
161             super.setCharset(this.charsetDecoder.charset());
162             return this;
163         }
164 
165         /**
166          * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying
167          * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
168          * is called.
169          *
170          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
171          *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
172          *                         {@link #flush()} or {@link #close()} is called.
173          * @return this
174          */
175         public Builder setWriteImmediately(final boolean writeImmediately) {
176             this.writeImmediately = writeImmediately;
177             return this;
178         }
179 
180     }
181 
182     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
183 
184     /**
185      * Constructs a new {@link Builder}.
186      *
187      * @return a new {@link Builder}.
188      * @since 2.12.0
189      */
190     public static Builder builder() {
191         return new Builder();
192     }
193 
194     /**
195      * Checks if the JDK in use properly supports the given charset.
196      *
197      * @param charset the charset to check the support for
198      */
199     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
200         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
201             return;
202         }
203         final String TEST_STRING_2 = "v\u00e9s";
204         final byte[] bytes = TEST_STRING_2.getBytes(charset);
205 
206         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
207         final ByteBuffer bb2 = ByteBuffer.allocate(16);
208         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
209         final int len = bytes.length;
210         for (int i = 0; i < len; i++) {
211             bb2.put(bytes[i]);
212             bb2.flip();
213             try {
214                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
215             } catch (final IllegalArgumentException e) {
216                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
217                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
218             }
219             bb2.compact();
220         }
221         cb2.rewind();
222         if (!TEST_STRING_2.equals(cb2.toString())) {
223             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
224                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
225         }
226 
227     }
228 
229     private final Writer writer;
230     private final CharsetDecoder decoder;
231 
232     private final boolean writeImmediately;
233 
234     /**
235      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
236      */
237     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
238 
239     /**
240      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
241      */
242     private final CharBuffer decoderOut;
243 
244     /**
245      * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
246      * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
247      *
248      * @param writer the target {@link Writer}
249      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
250      */
251     @Deprecated
252     public WriterOutputStream(final Writer writer) {
253         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
254     }
255 
256     /**
257      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
258      * when it overflows or when {@link #flush()} or {@link #close()} is called.
259      *
260      * @param writer  the target {@link Writer}
261      * @param charset the charset encoding
262      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
263      */
264     @Deprecated
265     public WriterOutputStream(final Writer writer, final Charset charset) {
266         this(writer, charset, BUFFER_SIZE, false);
267     }
268 
269     /**
270      * Constructs a new {@link WriterOutputStream}.
271      *
272      * @param writer           the target {@link Writer}
273      * @param charset          the charset encoding
274      * @param bufferSize       the size of the output buffer in number of characters
275      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
276      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
277      *                         {@link #flush()} or {@link #close()} is called.
278      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
279      */
280     @Deprecated
281     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
282         // @formatter:off
283         this(writer,
284             Charsets.toCharset(charset).newDecoder()
285                     .onMalformedInput(CodingErrorAction.REPLACE)
286                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
287                     .replaceWith("?"),
288              bufferSize,
289              writeImmediately);
290         // @formatter:on
291     }
292 
293     /**
294      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
295      * when it overflows or when {@link #flush()} or {@link #close()} is called.
296      *
297      * @param writer  the target {@link Writer}
298      * @param decoder the charset decoder
299      * @since 2.1
300      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
301      */
302     @Deprecated
303     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
304         this(writer, decoder, BUFFER_SIZE, false);
305     }
306 
307     /**
308      * Constructs a new {@link WriterOutputStream}.
309      *
310      * @param writer           the target {@link Writer}
311      * @param decoder          the charset decoder
312      * @param bufferSize       the size of the output buffer in number of characters
313      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
314      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
315      *                         {@link #flush()} or {@link #close()} is called.
316      * @since 2.1
317      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
318      */
319     @Deprecated
320     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
321         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
322         this.writer = writer;
323         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
324         this.writeImmediately = writeImmediately;
325         this.decoderOut = CharBuffer.allocate(bufferSize);
326     }
327 
328     /**
329      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
330      * when it overflows or when {@link #flush()} or {@link #close()} is called.
331      *
332      * @param writer      the target {@link Writer}
333      * @param charsetName the name of the charset encoding
334      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
335      */
336     @Deprecated
337     public WriterOutputStream(final Writer writer, final String charsetName) {
338         this(writer, charsetName, BUFFER_SIZE, false);
339     }
340 
341     /**
342      * Constructs a new {@link WriterOutputStream}.
343      *
344      * @param writer           the target {@link Writer}
345      * @param charsetName      the name of the charset encoding
346      * @param bufferSize       the size of the output buffer in number of characters
347      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
348      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
349      *                         {@link #flush()} or {@link #close()} is called.
350      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
351      */
352     @Deprecated
353     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
354         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
355     }
356 
357     /**
358      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
359      * {@link Writer#close()} will be called.
360      *
361      * @throws IOException if an I/O error occurs.
362      */
363     @Override
364     public void close() throws IOException {
365         processInput(true);
366         flushOutput();
367         writer.close();
368     }
369 
370     /**
371      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
372      * {@link Writer#flush()} will be called.
373      *
374      * @throws IOException if an I/O error occurs.
375      */
376     @Override
377     public void flush() throws IOException {
378         flushOutput();
379         writer.flush();
380     }
381 
382     /**
383      * Flush the output.
384      *
385      * @throws IOException if an I/O error occurs.
386      */
387     private void flushOutput() throws IOException {
388         if (decoderOut.position() > 0) {
389             writer.write(decoderOut.array(), 0, decoderOut.position());
390             decoderOut.rewind();
391         }
392     }
393 
394     /**
395      * Decode the contents of the input ByteBuffer into a CharBuffer.
396      *
397      * @param endOfInput indicates end of input
398      * @throws IOException if an I/O error occurs.
399      */
400     private void processInput(final boolean endOfInput) throws IOException {
401         // Prepare decoderIn for reading
402         decoderIn.flip();
403         CoderResult coderResult;
404         while (true) {
405             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
406             if (coderResult.isOverflow()) {
407                 flushOutput();
408             } else if (coderResult.isUnderflow()) {
409                 break;
410             } else {
411                 // The decoder is configured to replace malformed input and unmappable characters,
412                 // so we should not get here.
413                 throw new IOException("Unexpected coder result");
414             }
415         }
416         // Discard the bytes that have been read
417         decoderIn.compact();
418     }
419 
420     /**
421      * Write bytes from the specified byte array to the stream.
422      *
423      * @param b the byte array containing the bytes to write
424      * @throws IOException if an I/O error occurs.
425      */
426     @Override
427     public void write(final byte[] b) throws IOException {
428         write(b, 0, b.length);
429     }
430 
431     /**
432      * Write bytes from the specified byte array to the stream.
433      *
434      * @param b   the byte array containing the bytes to write
435      * @param off the start offset in the byte array
436      * @param len the number of bytes to write
437      * @throws IOException if an I/O error occurs.
438      */
439     @Override
440     public void write(final byte[] b, int off, int len) throws IOException {
441         while (len > 0) {
442             final int c = Math.min(len, decoderIn.remaining());
443             decoderIn.put(b, off, c);
444             processInput(false);
445             len -= c;
446             off += c;
447         }
448         if (writeImmediately) {
449             flushOutput();
450         }
451     }
452 
453     /**
454      * Write a single byte to the stream.
455      *
456      * @param b the byte to write
457      * @throws IOException if an I/O error occurs.
458      */
459     @Override
460     public void write(final int b) throws IOException {
461         write(new byte[] { (byte) b }, 0, 1);
462     }
463 }