View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.IOException;
20  import java.io.OutputStream;
21  import java.io.Writer;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.Charset;
25  import java.nio.charset.CharsetDecoder;
26  import java.nio.charset.CoderResult;
27  import java.nio.charset.CodingErrorAction;
28  
29  /**
30   * {@link OutputStream} implementation that transforms a byte stream to a
31   * character stream using a specified charset encoding and writes the resulting
32   * stream to a {@link Writer}. The stream is transformed using a
33   * {@link CharsetDecoder} object, guaranteeing that all charset
34   * encodings supported by the JRE are handled correctly.
35   * <p>
36   * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer.
37   * This implies that the data is written to the underlying {@link Writer} in chunks
38   * that are no larger than the size of this buffer. By default, the buffer is
39   * flushed only when it overflows or when {@link #flush()} or {@link #close()}
40   * is called. In general there is therefore no need to wrap the underlying {@link Writer}
41   * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also
42   * be instructed to flush the buffer after each write operation. In this case, all
43   * available data is written immediately to the underlying {@link Writer}, implying that
44   * the current position of the {@link Writer} is correlated to the current position
45   * of the {@link WriterOutputStream}.
46   * <p>
47   * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter};
48   * in the following example, writing to {@code out2} would have the same result as writing to
49   * {@code out} directly (provided that the byte sequence is legal with respect to the
50   * charset encoding):
51   * <pre>
52   * OutputStream out = ...
53   * Charset cs = ...
54   * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
55   * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre>
56   * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader},
57   * except that the control flow is reversed: both classes transform a byte stream
58   * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream,
59   * while {@link WriterOutputStream} pushes it to the underlying stream.
60   * <p>
61   * Note that while there are use cases where there is no alternative to using
62   * this class, very often the need to use this class is an indication of a flaw
63   * in the design of the code. This class is typically used in situations where an existing
64   * API only accepts an {@link OutputStream} object, but where the stream is known to represent
65   * character data that must be decoded for further use.
66   * </p>
67   * <p>
68   * Instances of {@link WriterOutputStream} are not thread safe.
69   * </p>
70   *
71   * @see org.apache.commons.io.input.ReaderInputStream
72   * @since 2.0
73   */
74  public class WriterOutputStream extends OutputStream {
75      private static final int BUFFER_SIZE = 1024;
76  
77      private final Writer writer;
78      private final CharsetDecoder decoder;
79      private final boolean writeImmediately;
80  
81      /**
82       * ByteBuffer used as input for the decoder. This buffer can be small
83       * as it is used only to transfer the received data to the
84       * decoder.
85       */
86      private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
87  
88      /**
89       * CharBuffer used as output for the decoder. It should be
90       * somewhat larger as we write from this buffer to the
91       * underlying Writer.
92       */
93      private final CharBuffer decoderOut;
94  
95      /**
96       * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
97       * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
98       * is called.
99       *
100      * @param writer the target {@link Writer}
101      * @param decoder the charset decoder
102      * @since 2.1
103      */
104     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
105         this(writer, decoder, BUFFER_SIZE, false);
106     }
107 
108     /**
109      * Constructs a new {@link WriterOutputStream}.
110      *
111      * @param writer the target {@link Writer}
112      * @param decoder the charset decoder
113      * @param bufferSize the size of the output buffer in number of characters
114      * @param writeImmediately If {@code true} the output buffer will be flushed after each
115      *                         write operation, i.e. all available data will be written to the
116      *                         underlying {@link Writer} immediately. If {@code false}, the
117      *                         output buffer will only be flushed when it overflows or when
118      *                         {@link #flush()} or {@link #close()} is called.
119      * @since 2.1
120      */
121     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize,
122                               final boolean writeImmediately) {
123         checkIbmJdkWithBrokenUTF16( decoder.charset());
124         this.writer = writer;
125         this.decoder = decoder;
126         this.writeImmediately = writeImmediately;
127         decoderOut = CharBuffer.allocate(bufferSize);
128     }
129 
130     /**
131      * Constructs a new {@link WriterOutputStream}.
132      *
133      * @param writer the target {@link Writer}
134      * @param charset the charset encoding
135      * @param bufferSize the size of the output buffer in number of characters
136      * @param writeImmediately If {@code true} the output buffer will be flushed after each
137      *                         write operation, i.e. all available data will be written to the
138      *                         underlying {@link Writer} immediately. If {@code false}, the
139      *                         output buffer will only be flushed when it overflows or when
140      *                         {@link #flush()} or {@link #close()} is called.
141      */
142     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize,
143                               final boolean writeImmediately) {
144         this(writer,
145              charset.newDecoder()
146                     .onMalformedInput(CodingErrorAction.REPLACE)
147                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
148                     .replaceWith("?"),
149              bufferSize,
150              writeImmediately);
151     }
152 
153     /**
154      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
155      * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
156      * is called.
157      *
158      * @param writer the target {@link Writer}
159      * @param charset the charset encoding
160      */
161     public WriterOutputStream(final Writer writer, final Charset charset) {
162         this(writer, charset, BUFFER_SIZE, false);
163     }
164 
165     /**
166      * Constructs a new {@link WriterOutputStream}.
167      *
168      * @param writer the target {@link Writer}
169      * @param charsetName the name of the charset encoding
170      * @param bufferSize the size of the output buffer in number of characters
171      * @param writeImmediately If {@code true} the output buffer will be flushed after each
172      *                         write operation, i.e. all available data will be written to the
173      *                         underlying {@link Writer} immediately. If {@code false}, the
174      *                         output buffer will only be flushed when it overflows or when
175      *                         {@link #flush()} or {@link #close()} is called.
176      */
177     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize,
178                               final boolean writeImmediately) {
179         this(writer, Charset.forName(charsetName), bufferSize, writeImmediately);
180     }
181 
182     /**
183      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
184      * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
185      * is called.
186      *
187      * @param writer the target {@link Writer}
188      * @param charsetName the name of the charset encoding
189      */
190     public WriterOutputStream(final Writer writer, final String charsetName) {
191         this(writer, charsetName, BUFFER_SIZE, false);
192     }
193 
194     /**
195      * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output
196      * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when
197      * {@link #flush()} or {@link #close()} is called.
198      *
199      * @param writer the target {@link Writer}
200      * @deprecated 2.5 use {@link #WriterOutputStream(Writer, Charset)} instead
201      */
202     @Deprecated
203     public WriterOutputStream(final Writer writer) {
204         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
205     }
206 
207     /**
208      * Write bytes from the specified byte array to the stream.
209      *
210      * @param b the byte array containing the bytes to write
211      * @param off the start offset in the byte array
212      * @param len the number of bytes to write
213      * @throws IOException if an I/O error occurs.
214      */
215     @Override
216     public void write(final byte[] b, int off, int len) throws IOException {
217         while (len > 0) {
218             final int c = Math.min(len, decoderIn.remaining());
219             decoderIn.put(b, off, c);
220             processInput(false);
221             len -= c;
222             off += c;
223         }
224         if (writeImmediately) {
225             flushOutput();
226         }
227     }
228 
229     /**
230      * Write bytes from the specified byte array to the stream.
231      *
232      * @param b the byte array containing the bytes to write
233      * @throws IOException if an I/O error occurs.
234      */
235     @Override
236     public void write(final byte[] b) throws IOException {
237         write(b, 0, b.length);
238     }
239 
240     /**
241      * Write a single byte to the stream.
242      *
243      * @param b the byte to write
244      * @throws IOException if an I/O error occurs.
245      */
246     @Override
247     public void write(final int b) throws IOException {
248         write(new byte[] {(byte) b}, 0, 1);
249     }
250 
251     /**
252      * Flush the stream. Any remaining content accumulated in the output buffer
253      * will be written to the underlying {@link Writer}. After that
254      * {@link Writer#flush()} will be called.
255      * @throws IOException if an I/O error occurs.
256      */
257     @Override
258     public void flush() throws IOException {
259         flushOutput();
260         writer.flush();
261     }
262 
263     /**
264      * Close the stream. Any remaining content accumulated in the output buffer
265      * will be written to the underlying {@link Writer}. After that
266      * {@link Writer#close()} will be called.
267      * @throws IOException if an I/O error occurs.
268      */
269     @Override
270     public void close() throws IOException {
271         processInput(true);
272         flushOutput();
273         writer.close();
274     }
275 
276     /**
277      * Decode the contents of the input ByteBuffer into a CharBuffer.
278      *
279      * @param endOfInput indicates end of input
280      * @throws IOException if an I/O error occurs.
281      */
282     private void processInput(final boolean endOfInput) throws IOException {
283         // Prepare decoderIn for reading
284         decoderIn.flip();
285         CoderResult coderResult;
286         while (true) {
287             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
288             if (coderResult.isOverflow()) {
289                 flushOutput();
290             } else if (coderResult.isUnderflow()) {
291                 break;
292             } else {
293                 // The decoder is configured to replace malformed input and unmappable characters,
294                 // so we should not get here.
295                 throw new IOException("Unexpected coder result");
296             }
297         }
298         // Discard the bytes that have been read
299         decoderIn.compact();
300     }
301 
302     /**
303      * Flush the output.
304      *
305      * @throws IOException if an I/O error occurs.
306      */
307     private void flushOutput() throws IOException {
308         if (decoderOut.position() > 0) {
309             writer.write(decoderOut.array(), 0, decoderOut.position());
310             decoderOut.rewind();
311         }
312     }
313 
314     /**
315      * Check if the JDK in use properly supports the given charset.
316      *
317      * @param charset the charset to check the support for
318      */
319     private static void checkIbmJdkWithBrokenUTF16(final Charset charset){
320         if (!"UTF-16".equals(charset.name())) {
321             return;
322         }
323         final String TEST_STRING_2 = "v\u00e9s";
324         final byte[] bytes = TEST_STRING_2.getBytes(charset);
325 
326         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
327         final ByteBuffer bb2 = ByteBuffer.allocate(16);
328         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
329         final int len = bytes.length;
330         for (int i = 0; i < len; i++) {
331             bb2.put(bytes[i]);
332             bb2.flip();
333             try {
334                 charsetDecoder2.decode(bb2, cb2, i == (len - 1));
335             } catch ( final IllegalArgumentException e){
336                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " +
337                         "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
338             }
339             bb2.compact();
340         }
341         cb2.rewind();
342         if (!TEST_STRING_2.equals(cb2.toString())){
343             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " +
344                     "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
345         }
346 
347     }
348 }