WriterOutputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.output;

  18. import java.io.BufferedWriter;
  19. import java.io.IOException;
  20. import java.io.InputStreamReader;
  21. import java.io.OutputStream;
  22. import java.io.OutputStreamWriter;
  23. import java.io.Writer;
  24. import java.nio.ByteBuffer;
  25. import java.nio.CharBuffer;
  26. import java.nio.charset.Charset;
  27. import java.nio.charset.CharsetDecoder;
  28. import java.nio.charset.CoderResult;
  29. import java.nio.charset.CodingErrorAction;
  30. import java.nio.charset.StandardCharsets;

  31. import org.apache.commons.io.Charsets;
  32. import org.apache.commons.io.IOUtils;
  33. import org.apache.commons.io.build.AbstractStreamBuilder;
  34. import org.apache.commons.io.charset.CharsetDecoders;

  35. /**
  36.  * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
  37.  * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
  38.  * correctly.
  39.  * <p>
  40.  * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
  41.  * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
  42.  * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
  43.  * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
  44.  * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
  45.  * </p>
  46.  * <p>
  47.  * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
  48.  * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
  49.  * </p>
  50.  * <p>
  51.  * To build an instance, use {@link Builder}.
  52.  * </p>
  53.  * <pre>
  54.  * OutputStream out = ...
  55.  * Charset cs = ...
  56.  * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
  57.  * WriterOutputStream out2 = WriterOutputStream.builder()
  58.  *   .setWriter(writer)
  59.  *   .setCharset(cs)
  60.  *   .get();
  61.  * </pre>
  62.  * <p>
  63.  * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
  64.  * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
  65.  * {@link WriterOutputStream} pushes it to the underlying stream.
  66.  * </p>
  67.  * <p>
  68.  * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
  69.  * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
  70.  * known to represent character data that must be decoded for further use.
  71.  * </p>
  72.  * <p>
  73.  * Instances of {@link WriterOutputStream} are not thread safe.
  74.  * </p>
  75.  *
  76.  * @see Builder
  77.  * @see org.apache.commons.io.input.ReaderInputStream
  78.  * @since 2.0
  79.  */
  80. public class WriterOutputStream extends OutputStream {

  81.     // @formatter:off
  82.     /**
  83.      * Builds a new {@link WriterOutputStream}.
  84.      *
  85.      * <p>
  86.      * For example:
  87.      * </p>
  88.      * <pre>{@code
  89.      * WriterOutputStream s = WriterOutputStream.builder()
  90.      *   .setPath(path)
  91.      *   .setBufferSize(8192)
  92.      *   .setCharset(StandardCharsets.UTF_8)
  93.      *   .setWriteImmediately(false)
  94.      *   .get();}
  95.      * </pre>
  96.      *
  97.      * @see #get()
  98.      * @since 2.12.0
  99.      */
  100.     // @formatter:on
  101.     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {

  102.         private CharsetDecoder charsetDecoder;
  103.         private boolean writeImmediately;

  104.         /**
  105.          * Constructs a new builder of {@link WriterOutputStream}.
  106.          */
  107.         public Builder() {
  108.             this.charsetDecoder = getCharset().newDecoder();
  109.         }

  110.         /**
  111.          * Builds a new {@link WriterOutputStream}.
  112.          * <p>
  113.          * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
  114.          * </p>
  115.          * <p>
  116.          * This builder uses the following aspects:
  117.          * </p>
  118.          * <ul>
  119.          * <li>{@link #getWriter()}</li>
  120.          * <li>{@link #getBufferSize()}</li>
  121.          * <li>charsetDecoder</li>
  122.          * <li>writeImmediately</li>
  123.          * </ul>
  124.          *
  125.          * @return a new instance.
  126.          * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
  127.          * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
  128.          * @see #getWriter()
  129.          * @see #getUnchecked()
  130.          */
  131.         @Override
  132.         public WriterOutputStream get() throws IOException {
  133.             return new WriterOutputStream(this);
  134.         }

  135.         @Override
  136.         public Builder setCharset(final Charset charset) {
  137.             super.setCharset(charset);
  138.             this.charsetDecoder = getCharset().newDecoder();
  139.             return this;
  140.         }

  141.         @Override
  142.         public Builder setCharset(final String charset) {
  143.             super.setCharset(charset);
  144.             this.charsetDecoder = getCharset().newDecoder();
  145.             return this;
  146.         }

  147.         /**
  148.          * Sets the charset decoder.
  149.          *
  150.          * @param charsetDecoder the charset decoder.
  151.          * @return {@code this} instance.
  152.          */
  153.         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
  154.             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
  155.             super.setCharset(this.charsetDecoder.charset());
  156.             return this;
  157.         }

  158.         /**
  159.          * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
  160.          * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
  161.          * {@link #close()} is called.
  162.          *
  163.          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
  164.          *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  165.          *                         {@link #flush()} or {@link #close()} is called.
  166.          * @return {@code this} instance.
  167.          */
  168.         public Builder setWriteImmediately(final boolean writeImmediately) {
  169.             this.writeImmediately = writeImmediately;
  170.             return this;
  171.         }

  172.     }

  173.     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;

  174.     /**
  175.      * Constructs a new {@link Builder}.
  176.      *
  177.      * @return a new {@link Builder}.
  178.      * @since 2.12.0
  179.      */
  180.     public static Builder builder() {
  181.         return new Builder();
  182.     }

  183.     /**
  184.      * Checks if the JDK in use properly supports the given charset.
  185.      *
  186.      * @param charset the charset to check the support for
  187.      */
  188.     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
  189.         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
  190.             return;
  191.         }
  192.         final String TEST_STRING_2 = "v\u00e9s";
  193.         final byte[] bytes = TEST_STRING_2.getBytes(charset);

  194.         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
  195.         final ByteBuffer bb2 = ByteBuffer.allocate(16);
  196.         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
  197.         final int len = bytes.length;
  198.         for (int i = 0; i < len; i++) {
  199.             bb2.put(bytes[i]);
  200.             bb2.flip();
  201.             try {
  202.                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
  203.             } catch (final IllegalArgumentException e) {
  204.                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  205.                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  206.             }
  207.             bb2.compact();
  208.         }
  209.         cb2.rewind();
  210.         if (!TEST_STRING_2.equals(cb2.toString())) {
  211.             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  212.                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  213.         }

  214.     }

  215.     private final Writer writer;
  216.     private final CharsetDecoder decoder;

  217.     private final boolean writeImmediately;

  218.     /**
  219.      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
  220.      */
  221.     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);

  222.     /**
  223.      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
  224.      */
  225.     private final CharBuffer decoderOut;

  226.     @SuppressWarnings("resource") // caller closes.
  227.     private WriterOutputStream(final Builder builder) throws IOException {
  228.         this(builder.getWriter(), builder.charsetDecoder, builder.getBufferSize(), builder.writeImmediately);
  229.     }

  230.     /**
  231.      * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@link Charset#defaultCharset() default charset} and with a default output
  232.      * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is
  233.      * called.
  234.      *
  235.      * @param writer the target {@link Writer}
  236.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  237.      */
  238.     @Deprecated
  239.     public WriterOutputStream(final Writer writer) {
  240.         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
  241.     }

  242.     /**
  243.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  244.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  245.      *
  246.      * @param writer  the target {@link Writer}
  247.      * @param charset the charset encoding
  248.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  249.      */
  250.     @Deprecated
  251.     public WriterOutputStream(final Writer writer, final Charset charset) {
  252.         this(writer, charset, BUFFER_SIZE, false);
  253.     }

  254.     /**
  255.      * Constructs a new {@link WriterOutputStream}.
  256.      *
  257.      * @param writer           the target {@link Writer}
  258.      * @param charset          the charset encoding
  259.      * @param bufferSize       the size of the output buffer in number of characters
  260.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  261.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  262.      *                         {@link #flush()} or {@link #close()} is called.
  263.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  264.      */
  265.     @Deprecated
  266.     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
  267.         // @formatter:off
  268.         this(writer,
  269.             Charsets.toCharset(charset).newDecoder()
  270.                     .onMalformedInput(CodingErrorAction.REPLACE)
  271.                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
  272.                     .replaceWith("?"),
  273.              bufferSize,
  274.              writeImmediately);
  275.         // @formatter:on
  276.     }

  277.     /**
  278.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  279.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  280.      *
  281.      * @param writer  the target {@link Writer}
  282.      * @param decoder the charset decoder
  283.      * @since 2.1
  284.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  285.      */
  286.     @Deprecated
  287.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
  288.         this(writer, decoder, BUFFER_SIZE, false);
  289.     }

  290.     /**
  291.      * Constructs a new {@link WriterOutputStream}.
  292.      *
  293.      * @param writer           the target {@link Writer}
  294.      * @param decoder          the charset decoder
  295.      * @param bufferSize       the size of the output buffer in number of characters
  296.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  297.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  298.      *                         {@link #flush()} or {@link #close()} is called.
  299.      * @since 2.1
  300.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  301.      */
  302.     @Deprecated
  303.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
  304.         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
  305.         this.writer = writer;
  306.         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
  307.         this.writeImmediately = writeImmediately;
  308.         this.decoderOut = CharBuffer.allocate(bufferSize);
  309.     }

  310.     /**
  311.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  312.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  313.      *
  314.      * @param writer      the target {@link Writer}
  315.      * @param charsetName the name of the charset encoding
  316.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  317.      */
  318.     @Deprecated
  319.     public WriterOutputStream(final Writer writer, final String charsetName) {
  320.         this(writer, charsetName, BUFFER_SIZE, false);
  321.     }

  322.     /**
  323.      * Constructs a new {@link WriterOutputStream}.
  324.      *
  325.      * @param writer           the target {@link Writer}
  326.      * @param charsetName      the name of the charset encoding
  327.      * @param bufferSize       the size of the output buffer in number of characters
  328.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  329.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  330.      *                         {@link #flush()} or {@link #close()} is called.
  331.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  332.      */
  333.     @Deprecated
  334.     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
  335.         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
  336.     }

  337.     /**
  338.      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  339.      * {@link Writer#close()} will be called.
  340.      *
  341.      * @throws IOException if an I/O error occurs.
  342.      */
  343.     @Override
  344.     public void close() throws IOException {
  345.         processInput(true);
  346.         flushOutput();
  347.         writer.close();
  348.     }

  349.     /**
  350.      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  351.      * {@link Writer#flush()} will be called.
  352.      *
  353.      * @throws IOException if an I/O error occurs.
  354.      */
  355.     @Override
  356.     public void flush() throws IOException {
  357.         flushOutput();
  358.         writer.flush();
  359.     }

  360.     /**
  361.      * Flush the output.
  362.      *
  363.      * @throws IOException if an I/O error occurs.
  364.      */
  365.     private void flushOutput() throws IOException {
  366.         if (decoderOut.position() > 0) {
  367.             writer.write(decoderOut.array(), 0, decoderOut.position());
  368.             decoderOut.rewind();
  369.         }
  370.     }

  371.     /**
  372.      * Decode the contents of the input ByteBuffer into a CharBuffer.
  373.      *
  374.      * @param endOfInput indicates end of input
  375.      * @throws IOException if an I/O error occurs.
  376.      */
  377.     private void processInput(final boolean endOfInput) throws IOException {
  378.         // Prepare decoderIn for reading
  379.         decoderIn.flip();
  380.         CoderResult coderResult;
  381.         while (true) {
  382.             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
  383.             if (coderResult.isOverflow()) {
  384.                 flushOutput();
  385.             } else if (coderResult.isUnderflow()) {
  386.                 break;
  387.             } else {
  388.                 // The decoder is configured to replace malformed input and unmappable characters,
  389.                 // so we should not get here.
  390.                 throw new IOException("Unexpected coder result");
  391.             }
  392.         }
  393.         // Discard the bytes that have been read
  394.         decoderIn.compact();
  395.     }

  396.     /**
  397.      * Writes bytes from the specified byte array to the stream.
  398.      *
  399.      * @param b the byte array containing the bytes to write
  400.      * @throws IOException if an I/O error occurs.
  401.      */
  402.     @Override
  403.     public void write(final byte[] b) throws IOException {
  404.         write(b, 0, b.length);
  405.     }

  406.     /**
  407.      * Writes bytes from the specified byte array to the stream.
  408.      *
  409.      * @param b   the byte array containing the bytes to write
  410.      * @param off the start offset in the byte array
  411.      * @param len the number of bytes to write
  412.      * @throws IOException if an I/O error occurs.
  413.      */
  414.     @Override
  415.     public void write(final byte[] b, int off, int len) throws IOException {
  416.         while (len > 0) {
  417.             final int c = Math.min(len, decoderIn.remaining());
  418.             decoderIn.put(b, off, c);
  419.             processInput(false);
  420.             len -= c;
  421.             off += c;
  422.         }
  423.         if (writeImmediately) {
  424.             flushOutput();
  425.         }
  426.     }

  427.     /**
  428.      * Writes a single byte to the stream.
  429.      *
  430.      * @param b the byte to write
  431.      * @throws IOException if an I/O error occurs.
  432.      */
  433.     @Override
  434.     public void write(final int b) throws IOException {
  435.         write(new byte[] { (byte) b }, 0, 1);
  436.     }
  437. }