View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.IOException;
20  import java.io.OutputStream;
21  import java.io.Writer;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.Charset;
25  import java.nio.charset.CharsetDecoder;
26  import java.nio.charset.CoderResult;
27  import java.nio.charset.CodingErrorAction;
28  
29  /**
30   * {@link OutputStream} implementation that transforms a byte stream to a
31   * character stream using a specified charset encoding and writes the resulting
32   * stream to a {@link Writer}. The stream is transformed using a
33   * {@link CharsetDecoder} object, guaranteeing that all charset
34   * encodings supported by the JRE are handled correctly.
35   * <p>
36   * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer.
37   * This implies that the data is written to the underlying {@link Writer} in chunks
38   * that are no larger than the size of this buffer. By default, the buffer is
39   * flushed only when it overflows or when {@link #flush()} or {@link #close()}
40   * is called. In general there is therefore no need to wrap the underlying {@link Writer}
41   * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also
42   * be instructed to flush the buffer after each write operation. In this case, all
43   * available data is written immediately to the underlying {@link Writer}, implying that
44   * the current position of the {@link Writer} is correlated to the current position
45   * of the {@link WriterOutputStream}.
46   * <p>
47   * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter};
48   * in the following example, writing to {@code out2} would have the same result as writing to
49   * {@code out} directly (provided that the byte sequence is legal with respect to the
50   * charset encoding):
51   * <pre>
52   * OutputStream out = ...
53   * Charset cs = ...
54   * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
55   * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre>
56   * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader},
57   * except that the control flow is reversed: both classes transform a byte stream
58   * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream,
59   * while {@link WriterOutputStream} pushes it to the underlying stream.
60   * <p>
61   * Note that while there are use cases where there is no alternative to using
62   * this class, very often the need to use this class is an indication of a flaw
63   * in the design of the code. This class is typically used in situations where an existing
64   * API only accepts an {@link OutputStream} object, but where the stream is known to represent
65   * character data that must be decoded for further use.
66   * <p>
67   * Instances of {@link WriterOutputStream} are not thread safe.
68   * 
69   * @see org.apache.commons.io.input.ReaderInputStream
70   * 
71   * @since 2.0
72   */
73  public class WriterOutputStream extends OutputStream {
74      private static final int DEFAULT_BUFFER_SIZE = 1024;
75  
76      private final Writer writer;
77      private final CharsetDecoder decoder;
78      private final boolean writeImmediately;
79  
80      /**
81       * ByteBuffer used as input for the decoder. This buffer can be small
82       * as it is used only to transfer the received data to the
83       * decoder.
84       */
85      private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
86  
87      /**
88       * CharBuffer used as output for the decoder. It should be
89       * somewhat larger as we write from this buffer to the
90       * underlying Writer.
91       */
92      private final CharBuffer decoderOut;
93  
94      /**
95       * Constructs a new {@link WriterOutputStream} with a default output buffer size of
96       * 1024 characters. The output buffer will only be flushed when it overflows or when
97       * {@link #flush()} or {@link #close()} is called.
98       * 
99       * @param writer the target {@link Writer}
100      * @param decoder the charset decoder
101      * @since 2.1
102      */
103     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
104         this(writer, decoder, DEFAULT_BUFFER_SIZE, false);
105     }
106 
107     /**
108      * Constructs a new {@link WriterOutputStream}.
109      * 
110      * @param writer the target {@link Writer}
111      * @param decoder the charset decoder
112      * @param bufferSize the size of the output buffer in number of characters
113      * @param writeImmediately If {@code true} the output buffer will be flushed after each
114      *                         write operation, i.e. all available data will be written to the
115      *                         underlying {@link Writer} immediately. If {@code false}, the
116      *                         output buffer will only be flushed when it overflows or when
117      *                         {@link #flush()} or {@link #close()} is called.
118      * @since 2.1
119      */
120     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize,
121                               final boolean writeImmediately) {
122         checkIbmJdkWithBrokenUTF16( decoder.charset());
123         this.writer = writer;
124         this.decoder = decoder;
125         this.writeImmediately = writeImmediately;
126         decoderOut = CharBuffer.allocate(bufferSize);
127     }
128 
129     /**
130      * Constructs a new {@link WriterOutputStream}.
131      * 
132      * @param writer the target {@link Writer}
133      * @param charset the charset encoding
134      * @param bufferSize the size of the output buffer in number of characters
135      * @param writeImmediately If {@code true} the output buffer will be flushed after each
136      *                         write operation, i.e. all available data will be written to the
137      *                         underlying {@link Writer} immediately. If {@code false}, the
138      *                         output buffer will only be flushed when it overflows or when
139      *                         {@link #flush()} or {@link #close()} is called.
140      */
141     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize,
142                               final boolean writeImmediately) {
143         this(writer,
144              charset.newDecoder()
145                     .onMalformedInput(CodingErrorAction.REPLACE)
146                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
147                     .replaceWith("?"),
148              bufferSize,
149              writeImmediately);
150     }
151 
152     /**
153      * Constructs a new {@link WriterOutputStream} with a default output buffer size of
154      * 1024 characters. The output buffer will only be flushed when it overflows or when
155      * {@link #flush()} or {@link #close()} is called.
156      * 
157      * @param writer the target {@link Writer}
158      * @param charset the charset encoding
159      */
160     public WriterOutputStream(final Writer writer, final Charset charset) {
161         this(writer, charset, DEFAULT_BUFFER_SIZE, false);
162     }
163 
164     /**
165      * Constructs a new {@link WriterOutputStream}.
166      * 
167      * @param writer the target {@link Writer}
168      * @param charsetName the name of the charset encoding
169      * @param bufferSize the size of the output buffer in number of characters
170      * @param writeImmediately If {@code true} the output buffer will be flushed after each
171      *                         write operation, i.e. all available data will be written to the
172      *                         underlying {@link Writer} immediately. If {@code false}, the
173      *                         output buffer will only be flushed when it overflows or when
174      *                         {@link #flush()} or {@link #close()} is called.
175      */
176     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize,
177                               final boolean writeImmediately) {
178         this(writer, Charset.forName(charsetName), bufferSize, writeImmediately);
179     }
180 
181     /**
182      * Constructs a new {@link WriterOutputStream} with a default output buffer size of
183      * 1024 characters. The output buffer will only be flushed when it overflows or when
184      * {@link #flush()} or {@link #close()} is called.
185      * 
186      * @param writer the target {@link Writer}
187      * @param charsetName the name of the charset encoding
188      */
189     public WriterOutputStream(final Writer writer, final String charsetName) {
190         this(writer, charsetName, DEFAULT_BUFFER_SIZE, false);
191     }
192 
193     /**
194      * Constructs a new {@link WriterOutputStream} that uses the default character encoding
195      * and with a default output buffer size of 1024 characters. The output buffer will only
196      * be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
197      * 
198      * @param writer the target {@link Writer}
199      * @deprecated 2.5 use {@link #WriterOutputStream(Writer, Charset)} instead
200      */
201     @Deprecated
202     public WriterOutputStream(final Writer writer) {
203         this(writer, Charset.defaultCharset(), DEFAULT_BUFFER_SIZE, false);
204     }
205 
206     /**
207      * Write bytes from the specified byte array to the stream.
208      * 
209      * @param b the byte array containing the bytes to write
210      * @param off the start offset in the byte array
211      * @param len the number of bytes to write
212      * @throws IOException if an I/O error occurs
213      */
214     @Override
215     public void write(final byte[] b, int off, int len) throws IOException {
216         while (len > 0) {
217             final int c = Math.min(len, decoderIn.remaining());
218             decoderIn.put(b, off, c);
219             processInput(false);
220             len -= c;
221             off += c;
222         }
223         if (writeImmediately) {
224             flushOutput();
225         }
226     }
227 
228     /**
229      * Write bytes from the specified byte array to the stream.
230      * 
231      * @param b the byte array containing the bytes to write
232      * @throws IOException if an I/O error occurs
233      */
234     @Override
235     public void write(final byte[] b) throws IOException {
236         write(b, 0, b.length);
237     }
238 
239     /**
240      * Write a single byte to the stream.
241      * 
242      * @param b the byte to write
243      * @throws IOException if an I/O error occurs
244      */
245     @Override
246     public void write(final int b) throws IOException {
247         write(new byte[] { (byte)b }, 0, 1);
248     }
249 
250     /**
251      * Flush the stream. Any remaining content accumulated in the output buffer
252      * will be written to the underlying {@link Writer}. After that
253      * {@link Writer#flush()} will be called. 
254      * @throws IOException if an I/O error occurs
255      */
256     @Override
257     public void flush() throws IOException {
258         flushOutput();
259         writer.flush();
260     }
261 
262     /**
263      * Close the stream. Any remaining content accumulated in the output buffer
264      * will be written to the underlying {@link Writer}. After that
265      * {@link Writer#close()} will be called. 
266      * @throws IOException if an I/O error occurs
267      */
268     @Override
269     public void close() throws IOException {
270         processInput(true);
271         flushOutput();
272         writer.close();
273     }
274 
275     /**
276      * Decode the contents of the input ByteBuffer into a CharBuffer.
277      * 
278      * @param endOfInput indicates end of input
279      * @throws IOException if an I/O error occurs
280      */
281     private void processInput(final boolean endOfInput) throws IOException {
282         // Prepare decoderIn for reading
283         decoderIn.flip();
284         CoderResult coderResult;
285         while (true) {
286             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
287             if (coderResult.isOverflow()) {
288                 flushOutput();
289             } else if (coderResult.isUnderflow()) {
290                 break;
291             } else {
292                 // The decoder is configured to replace malformed input and unmappable characters,
293                 // so we should not get here.
294                 throw new IOException("Unexpected coder result");
295             }
296         }
297         // Discard the bytes that have been read
298         decoderIn.compact();
299     }
300 
301     /**
302      * Flush the output.
303      * 
304      * @throws IOException if an I/O error occurs
305      */
306     private void flushOutput() throws IOException {
307         if (decoderOut.position() > 0) {
308             writer.write(decoderOut.array(), 0, decoderOut.position());
309             decoderOut.rewind();
310         }
311     }
312 
313     private static void checkIbmJdkWithBrokenUTF16(Charset charset){
314         if (!"UTF-16".equals(charset.name())) return;
315         final String TEST_STRING_2 = "v\u00e9s";
316         byte[] bytes = TEST_STRING_2.getBytes(charset);
317 
318         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
319         ByteBuffer bb2 = ByteBuffer.allocate(16);
320         CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
321         final int len = bytes.length;
322         for (int i = 0; i < len; i++) {
323             bb2.put(bytes[i]);
324             bb2.flip();
325             try {
326                 charsetDecoder2.decode(bb2, cb2, i == (len - 1));
327             } catch ( IllegalArgumentException e){
328                 throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " +
329                         "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
330             }
331             bb2.compact();
332         }
333         cb2.rewind();
334         if (!TEST_STRING_2.equals(cb2.toString())){
335             throw new UnsupportedOperationException("UTF-16 requested when runninng on an IBM JDK with broken UTF-16 support. " +
336                     "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
337         };
338 
339     }
340 }