WriterOutputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.output;

  18. import java.io.BufferedWriter;
  19. import java.io.IOException;
  20. import java.io.InputStreamReader;
  21. import java.io.OutputStream;
  22. import java.io.OutputStreamWriter;
  23. import java.io.Writer;
  24. import java.nio.ByteBuffer;
  25. import java.nio.CharBuffer;
  26. import java.nio.charset.Charset;
  27. import java.nio.charset.CharsetDecoder;
  28. import java.nio.charset.CoderResult;
  29. import java.nio.charset.CodingErrorAction;
  30. import java.nio.charset.StandardCharsets;

  31. import org.apache.commons.io.Charsets;
  32. import org.apache.commons.io.IOUtils;
  33. import org.apache.commons.io.build.AbstractStreamBuilder;
  34. import org.apache.commons.io.charset.CharsetDecoders;

  35. /**
  36.  * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
  37.  * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
  38.  * correctly.
  39.  * <p>
  40.  * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
  41.  * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
  42.  * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
  43.  * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
  44.  * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
  45.  * </p>
  46.  * <p>
  47.  * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
  48.  * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
  49.  * </p>
  50.  * <p>
  51.  * To build an instance, use {@link Builder}.
  52.  * </p>
  53.  * <pre>
  54.  * OutputStream out = ...
  55.  * Charset cs = ...
  56.  * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
  57.  * WriterOutputStream out2 = WriterOutputStream.builder()
  58.  *   .setWriter(writer)
  59.  *   .setCharset(cs)
  60.  *   .get();
  61.  * </pre>
  62.  * <p>
  63.  * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
  64.  * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
  65.  * {@link WriterOutputStream} pushes it to the underlying stream.
  66.  * </p>
  67.  * <p>
  68.  * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
  69.  * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
  70.  * known to represent character data that must be decoded for further use.
  71.  * </p>
  72.  * <p>
  73.  * Instances of {@link WriterOutputStream} are not thread safe.
  74.  * </p>
  75.  *
  76.  * @see Builder
  77.  * @see org.apache.commons.io.input.ReaderInputStream
  78.  * @since 2.0
  79.  */
  80. public class WriterOutputStream extends OutputStream {

  81.     // @formatter:off
  82.     /**
  83.      * Builds a new {@link WriterOutputStream}.
  84.      *
  85.      * <p>
  86.      * For example:
  87.      * </p>
  88.      * <pre>{@code
  89.      * WriterOutputStream s = WriterOutputStream.builder()
  90.      *   .setPath(path)
  91.      *   .setBufferSize(8192)
  92.      *   .setCharset(StandardCharsets.UTF_8)
  93.      *   .setWriteImmediately(false)
  94.      *   .get();}
  95.      * </pre>
  96.      *
  97.      * @see #get()
  98.      * @since 2.12.0
  99.      */
  100.     // @formatter:on
  101.     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {

  102.         private CharsetDecoder charsetDecoder;
  103.         private boolean writeImmediately;

  104.         /**
  105.          * Constructs a new builder of {@link WriterOutputStream}.
  106.          */
  107.         public Builder() {
  108.             this.charsetDecoder = getCharset().newDecoder();
  109.         }

  110.         /**
  111.          * Builds a new {@link WriterOutputStream}.
  112.          * <p>
  113.          * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
  114.          * </p>
  115.          * <p>
  116.          * This builder uses the following aspects:
  117.          * </p>
  118.          * <ul>
  119.          * <li>{@link #getWriter()}</li>
  120.          * <li>{@link #getBufferSize()}</li>
  121.          * <li>charsetDecoder</li>
  122.          * <li>writeImmediately</li>
  123.          * </ul>
  124.          *
  125.          * @return a new instance.
  126.          * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
  127.          * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
  128.          * @see #getWriter()
  129.          * @see #getUnchecked()
  130.          */
  131.         @Override
  132.         public WriterOutputStream get() throws IOException {
  133.             return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
  134.         }

  135.         @Override
  136.         public Builder setCharset(final Charset charset) {
  137.             super.setCharset(charset);
  138.             this.charsetDecoder = getCharset().newDecoder();
  139.             return this;
  140.         }

  141.         @Override
  142.         public Builder setCharset(final String charset) {
  143.             super.setCharset(charset);
  144.             this.charsetDecoder = getCharset().newDecoder();
  145.             return this;
  146.         }

  147.         /**
  148.          * Sets the charset decoder.
  149.          *
  150.          * @param charsetDecoder the charset decoder.
  151.          * @return {@code this} instance.
  152.          */
  153.         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
  154.             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
  155.             super.setCharset(this.charsetDecoder.charset());
  156.             return this;
  157.         }

  158.         /**
  159.          * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
  160.          * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
  161.          * {@link #close()} is called.
  162.          *
  163.          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
  164.          *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  165.          *                         {@link #flush()} or {@link #close()} is called.
  166.          * @return {@code this} instance.
  167.          */
  168.         public Builder setWriteImmediately(final boolean writeImmediately) {
  169.             this.writeImmediately = writeImmediately;
  170.             return this;
  171.         }

  172.     }

  173.     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;

  174.     /**
  175.      * Constructs a new {@link Builder}.
  176.      *
  177.      * @return a new {@link Builder}.
  178.      * @since 2.12.0
  179.      */
  180.     public static Builder builder() {
  181.         return new Builder();
  182.     }

  183.     /**
  184.      * Checks if the JDK in use properly supports the given charset.
  185.      *
  186.      * @param charset the charset to check the support for
  187.      */
  188.     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
  189.         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
  190.             return;
  191.         }
  192.         final String TEST_STRING_2 = "v\u00e9s";
  193.         final byte[] bytes = TEST_STRING_2.getBytes(charset);

  194.         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
  195.         final ByteBuffer bb2 = ByteBuffer.allocate(16);
  196.         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
  197.         final int len = bytes.length;
  198.         for (int i = 0; i < len; i++) {
  199.             bb2.put(bytes[i]);
  200.             bb2.flip();
  201.             try {
  202.                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
  203.             } catch (final IllegalArgumentException e) {
  204.                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  205.                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  206.             }
  207.             bb2.compact();
  208.         }
  209.         cb2.rewind();
  210.         if (!TEST_STRING_2.equals(cb2.toString())) {
  211.             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  212.                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  213.         }

  214.     }

  215.     private final Writer writer;
  216.     private final CharsetDecoder decoder;

  217.     private final boolean writeImmediately;

  218.     /**
  219.      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
  220.      */
  221.     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);

  222.     /**
  223.      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
  224.      */
  225.     private final CharBuffer decoderOut;

  226.     /**
  227.      * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@link Charset#defaultCharset() default charset} and with a default output
  228.      * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is
  229.      * called.
  230.      *
  231.      * @param writer the target {@link Writer}
  232.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  233.      */
  234.     @Deprecated
  235.     public WriterOutputStream(final Writer writer) {
  236.         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
  237.     }

  238.     /**
  239.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  240.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  241.      *
  242.      * @param writer  the target {@link Writer}
  243.      * @param charset the charset encoding
  244.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  245.      */
  246.     @Deprecated
  247.     public WriterOutputStream(final Writer writer, final Charset charset) {
  248.         this(writer, charset, BUFFER_SIZE, false);
  249.     }

  250.     /**
  251.      * Constructs a new {@link WriterOutputStream}.
  252.      *
  253.      * @param writer           the target {@link Writer}
  254.      * @param charset          the charset encoding
  255.      * @param bufferSize       the size of the output buffer in number of characters
  256.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  257.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  258.      *                         {@link #flush()} or {@link #close()} is called.
  259.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  260.      */
  261.     @Deprecated
  262.     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
  263.         // @formatter:off
  264.         this(writer,
  265.             Charsets.toCharset(charset).newDecoder()
  266.                     .onMalformedInput(CodingErrorAction.REPLACE)
  267.                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
  268.                     .replaceWith("?"),
  269.              bufferSize,
  270.              writeImmediately);
  271.         // @formatter:on
  272.     }

  273.     /**
  274.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  275.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  276.      *
  277.      * @param writer  the target {@link Writer}
  278.      * @param decoder the charset decoder
  279.      * @since 2.1
  280.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  281.      */
  282.     @Deprecated
  283.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
  284.         this(writer, decoder, BUFFER_SIZE, false);
  285.     }

  286.     /**
  287.      * Constructs a new {@link WriterOutputStream}.
  288.      *
  289.      * @param writer           the target {@link Writer}
  290.      * @param decoder          the charset decoder
  291.      * @param bufferSize       the size of the output buffer in number of characters
  292.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  293.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  294.      *                         {@link #flush()} or {@link #close()} is called.
  295.      * @since 2.1
  296.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  297.      */
  298.     @Deprecated
  299.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
  300.         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
  301.         this.writer = writer;
  302.         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
  303.         this.writeImmediately = writeImmediately;
  304.         this.decoderOut = CharBuffer.allocate(bufferSize);
  305.     }

  306.     /**
  307.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  308.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  309.      *
  310.      * @param writer      the target {@link Writer}
  311.      * @param charsetName the name of the charset encoding
  312.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  313.      */
  314.     @Deprecated
  315.     public WriterOutputStream(final Writer writer, final String charsetName) {
  316.         this(writer, charsetName, BUFFER_SIZE, false);
  317.     }

  318.     /**
  319.      * Constructs a new {@link WriterOutputStream}.
  320.      *
  321.      * @param writer           the target {@link Writer}
  322.      * @param charsetName      the name of the charset encoding
  323.      * @param bufferSize       the size of the output buffer in number of characters
  324.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  325.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  326.      *                         {@link #flush()} or {@link #close()} is called.
  327.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  328.      */
  329.     @Deprecated
  330.     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
  331.         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
  332.     }

  333.     /**
  334.      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  335.      * {@link Writer#close()} will be called.
  336.      *
  337.      * @throws IOException if an I/O error occurs.
  338.      */
  339.     @Override
  340.     public void close() throws IOException {
  341.         processInput(true);
  342.         flushOutput();
  343.         writer.close();
  344.     }

  345.     /**
  346.      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  347.      * {@link Writer#flush()} will be called.
  348.      *
  349.      * @throws IOException if an I/O error occurs.
  350.      */
  351.     @Override
  352.     public void flush() throws IOException {
  353.         flushOutput();
  354.         writer.flush();
  355.     }

  356.     /**
  357.      * Flush the output.
  358.      *
  359.      * @throws IOException if an I/O error occurs.
  360.      */
  361.     private void flushOutput() throws IOException {
  362.         if (decoderOut.position() > 0) {
  363.             writer.write(decoderOut.array(), 0, decoderOut.position());
  364.             decoderOut.rewind();
  365.         }
  366.     }

  367.     /**
  368.      * Decode the contents of the input ByteBuffer into a CharBuffer.
  369.      *
  370.      * @param endOfInput indicates end of input
  371.      * @throws IOException if an I/O error occurs.
  372.      */
  373.     private void processInput(final boolean endOfInput) throws IOException {
  374.         // Prepare decoderIn for reading
  375.         decoderIn.flip();
  376.         CoderResult coderResult;
  377.         while (true) {
  378.             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
  379.             if (coderResult.isOverflow()) {
  380.                 flushOutput();
  381.             } else if (coderResult.isUnderflow()) {
  382.                 break;
  383.             } else {
  384.                 // The decoder is configured to replace malformed input and unmappable characters,
  385.                 // so we should not get here.
  386.                 throw new IOException("Unexpected coder result");
  387.             }
  388.         }
  389.         // Discard the bytes that have been read
  390.         decoderIn.compact();
  391.     }

  392.     /**
  393.      * Write bytes from the specified byte array to the stream.
  394.      *
  395.      * @param b the byte array containing the bytes to write
  396.      * @throws IOException if an I/O error occurs.
  397.      */
  398.     @Override
  399.     public void write(final byte[] b) throws IOException {
  400.         write(b, 0, b.length);
  401.     }

  402.     /**
  403.      * Write bytes from the specified byte array to the stream.
  404.      *
  405.      * @param b   the byte array containing the bytes to write
  406.      * @param off the start offset in the byte array
  407.      * @param len the number of bytes to write
  408.      * @throws IOException if an I/O error occurs.
  409.      */
  410.     @Override
  411.     public void write(final byte[] b, int off, int len) throws IOException {
  412.         while (len > 0) {
  413.             final int c = Math.min(len, decoderIn.remaining());
  414.             decoderIn.put(b, off, c);
  415.             processInput(false);
  416.             len -= c;
  417.             off += c;
  418.         }
  419.         if (writeImmediately) {
  420.             flushOutput();
  421.         }
  422.     }

  423.     /**
  424.      * Write a single byte to the stream.
  425.      *
  426.      * @param b the byte to write
  427.      * @throws IOException if an I/O error occurs.
  428.      */
  429.     @Override
  430.     public void write(final int b) throws IOException {
  431.         write(new byte[] { (byte) b }, 0, 1);
  432.     }
  433. }