001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.io.Writer;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetDecoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028import java.nio.charset.StandardCharsets;
029
030import org.apache.commons.io.Charsets;
031import org.apache.commons.io.IOUtils;
032import org.apache.commons.io.build.AbstractStreamBuilder;
033import org.apache.commons.io.charset.CharsetDecoders;
034
035/**
036 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
037 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
038 * correctly.
039 * <p>
040 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
041 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
042 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can
043 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
044 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
045 * </p>
046 * <p>
047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter}; in the following example, writing to {@code out2}
048 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
049 * </p>
050 * <p>
051 * To build an instance, use {@link Builder}.
052 * </p>
053 * <pre>
054 * OutputStream out = ...
055 * Charset cs = ...
056 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
057 * WriterOutputStream out2 = WriterOutputStream.builder()
058 *   .setWriter(writer)
059 *   .setCharset(cs)
060 *   .get();
061 * </pre>
062 * <p>
063 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader}, except that the control flow is reversed: both classes
064 * transform a byte stream into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream, while
065 * {@link WriterOutputStream} pushes it to the underlying stream.
066 * </p>
067 * <p>
068 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
069 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
070 * known to represent character data that must be decoded for further use.
071 * </p>
072 * <p>
073 * Instances of {@link WriterOutputStream} are not thread safe.
074 * </p>
075 *
076 * @see Builder
077 * @see org.apache.commons.io.input.ReaderInputStream
078 * @since 2.0
079 */
080public class WriterOutputStream extends OutputStream {
081
082    // @formatter:off
083    /**
084     * Builds a new {@link WriterOutputStream}.
085     *
086     * <p>
087     * For example:
088     * </p>
089     * <pre>{@code
090     * WriterOutputStream s = WriterOutputStream.builder()
091     *   .setPath(path)
092     *   .setBufferSize(8192)
093     *   .setCharset(StandardCharsets.UTF_8)
094     *   .setWriteImmediately(false)
095     *   .get();}
096     * </pre>
097     *
098     * @see #get()
099     * @since 2.12.0
100     */
101    // @formatter:on
102    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
103
104        private CharsetDecoder charsetDecoder;
105        private boolean writeImmediately;
106
107        /**
108         * Constructs a new Builder.
109         */
110        public Builder() {
111            this.charsetDecoder = getCharset().newDecoder();
112        }
113
114        /**
115         * Builds a new {@link WriterOutputStream}.
116         * <p>
117         * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
118         * </p>
119         * <p>
120         * This builder use the following aspects:
121         * </p>
122         * <ul>
123         * <li>{@link #getWriter()}</li>
124         * <li>{@link #getBufferSize()}</li>
125         * <li>charsetDecoder</li>
126         * <li>writeImmediately</li>
127         * </ul>
128         *
129         * @return a new instance.
130         * @throws UnsupportedOperationException if the origin cannot provide a Writer.
131         * @see #getWriter()
132         */
133        @SuppressWarnings("resource")
134        @Override
135        public WriterOutputStream get() throws IOException {
136            return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
137        }
138
139        @Override
140        public Builder setCharset(final Charset charset) {
141            super.setCharset(charset);
142            this.charsetDecoder = getCharset().newDecoder();
143            return this;
144        }
145
146        @Override
147        public Builder setCharset(final String charset) {
148            super.setCharset(charset);
149            this.charsetDecoder = getCharset().newDecoder();
150            return this;
151        }
152
153        /**
154         * Sets the charset decoder.
155         *
156         * @param charsetDecoder the charset decoder.
157         * @return this
158         */
159        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
160            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
161            super.setCharset(this.charsetDecoder.charset());
162            return this;
163        }
164
165        /**
166         * Sets whether the output buffer will be flushed after each write operation ({@code true}), i.e. all available data will be written to the underlying
167         * {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
168         * is called.
169         *
170         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
171         *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
172         *                         {@link #flush()} or {@link #close()} is called.
173         * @return this
174         */
175        public Builder setWriteImmediately(final boolean writeImmediately) {
176            this.writeImmediately = writeImmediately;
177            return this;
178        }
179
180    }
181
182    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
183
184    /**
185     * Constructs a new {@link Builder}.
186     *
187     * @return a new {@link Builder}.
188     * @since 2.12.0
189     */
190    public static Builder builder() {
191        return new Builder();
192    }
193
194    /**
195     * Checks if the JDK in use properly supports the given charset.
196     *
197     * @param charset the charset to check the support for
198     */
199    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
200        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
201            return;
202        }
203        final String TEST_STRING_2 = "v\u00e9s";
204        final byte[] bytes = TEST_STRING_2.getBytes(charset);
205
206        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
207        final ByteBuffer bb2 = ByteBuffer.allocate(16);
208        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
209        final int len = bytes.length;
210        for (int i = 0; i < len; i++) {
211            bb2.put(bytes[i]);
212            bb2.flip();
213            try {
214                charsetDecoder2.decode(bb2, cb2, i == len - 1);
215            } catch (final IllegalArgumentException e) {
216                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
217                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
218            }
219            bb2.compact();
220        }
221        cb2.rewind();
222        if (!TEST_STRING_2.equals(cb2.toString())) {
223            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
224                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
225        }
226
227    }
228
229    private final Writer writer;
230    private final CharsetDecoder decoder;
231
232    private final boolean writeImmediately;
233
234    /**
235     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
236     */
237    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
238
239    /**
240     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
241     */
242    private final CharBuffer decoderOut;
243
244    /**
245     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
246     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
247     *
248     * @param writer the target {@link Writer}
249     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
250     */
251    @Deprecated
252    public WriterOutputStream(final Writer writer) {
253        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
254    }
255
256    /**
257     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
258     * when it overflows or when {@link #flush()} or {@link #close()} is called.
259     *
260     * @param writer  the target {@link Writer}
261     * @param charset the charset encoding
262     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
263     */
264    @Deprecated
265    public WriterOutputStream(final Writer writer, final Charset charset) {
266        this(writer, charset, BUFFER_SIZE, false);
267    }
268
269    /**
270     * Constructs a new {@link WriterOutputStream}.
271     *
272     * @param writer           the target {@link Writer}
273     * @param charset          the charset encoding
274     * @param bufferSize       the size of the output buffer in number of characters
275     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
276     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
277     *                         {@link #flush()} or {@link #close()} is called.
278     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
279     */
280    @Deprecated
281    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
282        // @formatter:off
283        this(writer,
284            Charsets.toCharset(charset).newDecoder()
285                    .onMalformedInput(CodingErrorAction.REPLACE)
286                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
287                    .replaceWith("?"),
288             bufferSize,
289             writeImmediately);
290        // @formatter:on
291    }
292
293    /**
294     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
295     * when it overflows or when {@link #flush()} or {@link #close()} is called.
296     *
297     * @param writer  the target {@link Writer}
298     * @param decoder the charset decoder
299     * @since 2.1
300     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
301     */
302    @Deprecated
303    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
304        this(writer, decoder, BUFFER_SIZE, false);
305    }
306
307    /**
308     * Constructs a new {@link WriterOutputStream}.
309     *
310     * @param writer           the target {@link Writer}
311     * @param decoder          the charset decoder
312     * @param bufferSize       the size of the output buffer in number of characters
313     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
314     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
315     *                         {@link #flush()} or {@link #close()} is called.
316     * @since 2.1
317     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
318     */
319    @Deprecated
320    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
321        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
322        this.writer = writer;
323        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
324        this.writeImmediately = writeImmediately;
325        this.decoderOut = CharBuffer.allocate(bufferSize);
326    }
327
328    /**
329     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
330     * when it overflows or when {@link #flush()} or {@link #close()} is called.
331     *
332     * @param writer      the target {@link Writer}
333     * @param charsetName the name of the charset encoding
334     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
335     */
336    @Deprecated
337    public WriterOutputStream(final Writer writer, final String charsetName) {
338        this(writer, charsetName, BUFFER_SIZE, false);
339    }
340
341    /**
342     * Constructs a new {@link WriterOutputStream}.
343     *
344     * @param writer           the target {@link Writer}
345     * @param charsetName      the name of the charset encoding
346     * @param bufferSize       the size of the output buffer in number of characters
347     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, i.e. all available data will be written to the
348     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
349     *                         {@link #flush()} or {@link #close()} is called.
350     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
351     */
352    @Deprecated
353    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
354        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
355    }
356
357    /**
358     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
359     * {@link Writer#close()} will be called.
360     *
361     * @throws IOException if an I/O error occurs.
362     */
363    @Override
364    public void close() throws IOException {
365        processInput(true);
366        flushOutput();
367        writer.close();
368    }
369
370    /**
371     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
372     * {@link Writer#flush()} will be called.
373     *
374     * @throws IOException if an I/O error occurs.
375     */
376    @Override
377    public void flush() throws IOException {
378        flushOutput();
379        writer.flush();
380    }
381
382    /**
383     * Flush the output.
384     *
385     * @throws IOException if an I/O error occurs.
386     */
387    private void flushOutput() throws IOException {
388        if (decoderOut.position() > 0) {
389            writer.write(decoderOut.array(), 0, decoderOut.position());
390            decoderOut.rewind();
391        }
392    }
393
394    /**
395     * Decode the contents of the input ByteBuffer into a CharBuffer.
396     *
397     * @param endOfInput indicates end of input
398     * @throws IOException if an I/O error occurs.
399     */
400    private void processInput(final boolean endOfInput) throws IOException {
401        // Prepare decoderIn for reading
402        decoderIn.flip();
403        CoderResult coderResult;
404        while (true) {
405            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
406            if (coderResult.isOverflow()) {
407                flushOutput();
408            } else if (coderResult.isUnderflow()) {
409                break;
410            } else {
411                // The decoder is configured to replace malformed input and unmappable characters,
412                // so we should not get here.
413                throw new IOException("Unexpected coder result");
414            }
415        }
416        // Discard the bytes that have been read
417        decoderIn.compact();
418    }
419
420    /**
421     * Write bytes from the specified byte array to the stream.
422     *
423     * @param b the byte array containing the bytes to write
424     * @throws IOException if an I/O error occurs.
425     */
426    @Override
427    public void write(final byte[] b) throws IOException {
428        write(b, 0, b.length);
429    }
430
431    /**
432     * Write bytes from the specified byte array to the stream.
433     *
434     * @param b   the byte array containing the bytes to write
435     * @param off the start offset in the byte array
436     * @param len the number of bytes to write
437     * @throws IOException if an I/O error occurs.
438     */
439    @Override
440    public void write(final byte[] b, int off, int len) throws IOException {
441        while (len > 0) {
442            final int c = Math.min(len, decoderIn.remaining());
443            decoderIn.put(b, off, c);
444            processInput(false);
445            len -= c;
446            off += c;
447        }
448        if (writeImmediately) {
449            flushOutput();
450        }
451    }
452
453    /**
454     * Write a single byte to the stream.
455     *
456     * @param b the byte to write
457     * @throws IOException if an I/O error occurs.
458     */
459    @Override
460    public void write(final int b) throws IOException {
461        write(new byte[] { (byte) b }, 0, 1);
462    }
463}