WriterOutputStream.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.output;

  18. import java.io.BufferedWriter;
  19. import java.io.IOException;
  20. import java.io.InputStreamReader;
  21. import java.io.OutputStream;
  22. import java.io.OutputStreamWriter;
  23. import java.io.Writer;
  24. import java.nio.ByteBuffer;
  25. import java.nio.CharBuffer;
  26. import java.nio.charset.Charset;
  27. import java.nio.charset.CharsetDecoder;
  28. import java.nio.charset.CoderResult;
  29. import java.nio.charset.CodingErrorAction;
  30. import java.nio.charset.StandardCharsets;

  31. import org.apache.commons.io.Charsets;
  32. import org.apache.commons.io.IOUtils;
  33. import org.apache.commons.io.build.AbstractStreamBuilder;
  34. import org.apache.commons.io.charset.CharsetDecoders;

  35. /**
  36.  * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
  37.  * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
  38.  * correctly.
  39.  * <p>
  40.  * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
  41.  * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
  42.  * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
  43.  * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
  44.  * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
  45.  * </p>
  46.  * <p>
  47.  * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
  48.  * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
  49.  * </p>
  50.  * <p>
  51.  * To build an instance, use {@link Builder}.
  52.  * </p>
  53.  * <pre>
  54.  * OutputStream out = ...
  55.  * Charset cs = ...
  56.  * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
  57.  * WriterOutputStream out2 = WriterOutputStream.builder()
  58.  *   .setWriter(writer)
  59.  *   .setCharset(cs)
  60.  *   .get();
  61.  * </pre>
  62.  * <p>
  63.  * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
  64.  * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
  65.  * {@link WriterOutputStream} pushes it to the underlying stream.
  66.  * </p>
  67.  * <p>
  68.  * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
  69.  * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
  70.  * known to represent character data that must be decoded for further use.
  71.  * </p>
  72.  * <p>
  73.  * Instances of {@link WriterOutputStream} are not thread safe.
  74.  * </p>
  75.  *
  76.  * @see Builder
  77.  * @see org.apache.commons.io.input.ReaderInputStream
  78.  * @since 2.0
  79.  */
  80. public class WriterOutputStream extends OutputStream {

  81.     // @formatter:off
  82.     /**
  83.      * Builds a new {@link WriterOutputStream}.
  84.      *
  85.      * <p>
  86.      * For example:
  87.      * </p>
  88.      * <pre>{@code
  89.      * WriterOutputStream s = WriterOutputStream.builder()
  90.      *   .setPath(path)
  91.      *   .setBufferSize(8192)
  92.      *   .setCharset(StandardCharsets.UTF_8)
  93.      *   .setWriteImmediately(false)
  94.      *   .get();}
  95.      * </pre>
  96.      *
  97.      * @see #get()
  98.      * @since 2.12.0
  99.      */
  100.     // @formatter:on
  101.     public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {

  102.         private CharsetDecoder charsetDecoder;
  103.         private boolean writeImmediately;

  104.         /**
  105.          * Constructs a new Builder.
  106.          */
  107.         public Builder() {
  108.             this.charsetDecoder = getCharset().newDecoder();
  109.         }

  110.         /**
  111.          * Builds a new {@link WriterOutputStream}.
  112.          * <p>
  113.          * You must set input that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
  114.          * </p>
  115.          * <p>
  116.          * This builder use the following aspects:
  117.          * </p>
  118.          * <ul>
  119.          * <li>{@link #getWriter()}</li>
  120.          * <li>{@link #getBufferSize()}</li>
  121.          * <li>charsetDecoder</li>
  122.          * <li>writeImmediately</li>
  123.          * </ul>
  124.          *
  125.          * @return a new instance.
  126.          * @throws UnsupportedOperationException if the origin cannot provide a Writer.
  127.          * @see #getWriter()
  128.          */
  129.         @SuppressWarnings("resource")
  130.         @Override
  131.         public WriterOutputStream get() throws IOException {
  132.             return new WriterOutputStream(getWriter(), charsetDecoder, getBufferSize(), writeImmediately);
  133.         }

  134.         @Override
  135.         public Builder setCharset(final Charset charset) {
  136.             super.setCharset(charset);
  137.             this.charsetDecoder = getCharset().newDecoder();
  138.             return this;
  139.         }

  140.         @Override
  141.         public Builder setCharset(final String charset) {
  142.             super.setCharset(charset);
  143.             this.charsetDecoder = getCharset().newDecoder();
  144.             return this;
  145.         }

  146.         /**
  147.          * Sets the charset decoder.
  148.          *
  149.          * @param charsetDecoder the charset decoder.
  150.          * @return {@code this} instance.
  151.          */
  152.         public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
  153.             this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
  154.             super.setCharset(this.charsetDecoder.charset());
  155.             return this;
  156.         }

  157.         /**
  158.          * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
  159.          * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
  160.          * {@link #close()} is called.
  161.          *
  162.          * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
  163.          *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  164.          *                         {@link #flush()} or {@link #close()} is called.
  165.          * @return {@code this} instance.
  166.          */
  167.         public Builder setWriteImmediately(final boolean writeImmediately) {
  168.             this.writeImmediately = writeImmediately;
  169.             return this;
  170.         }

  171.     }

  172.     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;

  173.     /**
  174.      * Constructs a new {@link Builder}.
  175.      *
  176.      * @return a new {@link Builder}.
  177.      * @since 2.12.0
  178.      */
  179.     public static Builder builder() {
  180.         return new Builder();
  181.     }

  182.     /**
  183.      * Checks if the JDK in use properly supports the given charset.
  184.      *
  185.      * @param charset the charset to check the support for
  186.      */
  187.     private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
  188.         if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
  189.             return;
  190.         }
  191.         final String TEST_STRING_2 = "v\u00e9s";
  192.         final byte[] bytes = TEST_STRING_2.getBytes(charset);

  193.         final CharsetDecoder charsetDecoder2 = charset.newDecoder();
  194.         final ByteBuffer bb2 = ByteBuffer.allocate(16);
  195.         final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
  196.         final int len = bytes.length;
  197.         for (int i = 0; i < len; i++) {
  198.             bb2.put(bytes[i]);
  199.             bb2.flip();
  200.             try {
  201.                 charsetDecoder2.decode(bb2, cb2, i == len - 1);
  202.             } catch (final IllegalArgumentException e) {
  203.                 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  204.                         + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  205.             }
  206.             bb2.compact();
  207.         }
  208.         cb2.rewind();
  209.         if (!TEST_STRING_2.equals(cb2.toString())) {
  210.             throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
  211.                     + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
  212.         }

  213.     }

  214.     private final Writer writer;
  215.     private final CharsetDecoder decoder;

  216.     private final boolean writeImmediately;

  217.     /**
  218.      * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
  219.      */
  220.     private final ByteBuffer decoderIn = ByteBuffer.allocate(128);

  221.     /**
  222.      * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
  223.      */
  224.     private final CharBuffer decoderOut;

  225.     /**
  226.      * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output buffer size of {@value #BUFFER_SIZE}
  227.      * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()} is called.
  228.      *
  229.      * @param writer the target {@link Writer}
  230.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  231.      */
  232.     @Deprecated
  233.     public WriterOutputStream(final Writer writer) {
  234.         this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
  235.     }

  236.     /**
  237.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  238.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  239.      *
  240.      * @param writer  the target {@link Writer}
  241.      * @param charset the charset encoding
  242.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  243.      */
  244.     @Deprecated
  245.     public WriterOutputStream(final Writer writer, final Charset charset) {
  246.         this(writer, charset, BUFFER_SIZE, false);
  247.     }

  248.     /**
  249.      * Constructs a new {@link WriterOutputStream}.
  250.      *
  251.      * @param writer           the target {@link Writer}
  252.      * @param charset          the charset encoding
  253.      * @param bufferSize       the size of the output buffer in number of characters
  254.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  255.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  256.      *                         {@link #flush()} or {@link #close()} is called.
  257.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  258.      */
  259.     @Deprecated
  260.     public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
  261.         // @formatter:off
  262.         this(writer,
  263.             Charsets.toCharset(charset).newDecoder()
  264.                     .onMalformedInput(CodingErrorAction.REPLACE)
  265.                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
  266.                     .replaceWith("?"),
  267.              bufferSize,
  268.              writeImmediately);
  269.         // @formatter:on
  270.     }

  271.     /**
  272.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  273.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  274.      *
  275.      * @param writer  the target {@link Writer}
  276.      * @param decoder the charset decoder
  277.      * @since 2.1
  278.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  279.      */
  280.     @Deprecated
  281.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
  282.         this(writer, decoder, BUFFER_SIZE, false);
  283.     }

  284.     /**
  285.      * Constructs a new {@link WriterOutputStream}.
  286.      *
  287.      * @param writer           the target {@link Writer}
  288.      * @param decoder          the charset decoder
  289.      * @param bufferSize       the size of the output buffer in number of characters
  290.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  291.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  292.      *                         {@link #flush()} or {@link #close()} is called.
  293.      * @since 2.1
  294.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  295.      */
  296.     @Deprecated
  297.     public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
  298.         checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
  299.         this.writer = writer;
  300.         this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
  301.         this.writeImmediately = writeImmediately;
  302.         this.decoderOut = CharBuffer.allocate(bufferSize);
  303.     }

  304.     /**
  305.      * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
  306.      * when it overflows or when {@link #flush()} or {@link #close()} is called.
  307.      *
  308.      * @param writer      the target {@link Writer}
  309.      * @param charsetName the name of the charset encoding
  310.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  311.      */
  312.     @Deprecated
  313.     public WriterOutputStream(final Writer writer, final String charsetName) {
  314.         this(writer, charsetName, BUFFER_SIZE, false);
  315.     }

  316.     /**
  317.      * Constructs a new {@link WriterOutputStream}.
  318.      *
  319.      * @param writer           the target {@link Writer}
  320.      * @param charsetName      the name of the charset encoding
  321.      * @param bufferSize       the size of the output buffer in number of characters
  322.      * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
  323.      *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
  324.      *                         {@link #flush()} or {@link #close()} is called.
  325.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  326.      */
  327.     @Deprecated
  328.     public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
  329.         this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
  330.     }

  331.     /**
  332.      * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  333.      * {@link Writer#close()} will be called.
  334.      *
  335.      * @throws IOException if an I/O error occurs.
  336.      */
  337.     @Override
  338.     public void close() throws IOException {
  339.         processInput(true);
  340.         flushOutput();
  341.         writer.close();
  342.     }

  343.     /**
  344.      * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
  345.      * {@link Writer#flush()} will be called.
  346.      *
  347.      * @throws IOException if an I/O error occurs.
  348.      */
  349.     @Override
  350.     public void flush() throws IOException {
  351.         flushOutput();
  352.         writer.flush();
  353.     }

  354.     /**
  355.      * Flush the output.
  356.      *
  357.      * @throws IOException if an I/O error occurs.
  358.      */
  359.     private void flushOutput() throws IOException {
  360.         if (decoderOut.position() > 0) {
  361.             writer.write(decoderOut.array(), 0, decoderOut.position());
  362.             decoderOut.rewind();
  363.         }
  364.     }

  365.     /**
  366.      * Decode the contents of the input ByteBuffer into a CharBuffer.
  367.      *
  368.      * @param endOfInput indicates end of input
  369.      * @throws IOException if an I/O error occurs.
  370.      */
  371.     private void processInput(final boolean endOfInput) throws IOException {
  372.         // Prepare decoderIn for reading
  373.         decoderIn.flip();
  374.         CoderResult coderResult;
  375.         while (true) {
  376.             coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
  377.             if (coderResult.isOverflow()) {
  378.                 flushOutput();
  379.             } else if (coderResult.isUnderflow()) {
  380.                 break;
  381.             } else {
  382.                 // The decoder is configured to replace malformed input and unmappable characters,
  383.                 // so we should not get here.
  384.                 throw new IOException("Unexpected coder result");
  385.             }
  386.         }
  387.         // Discard the bytes that have been read
  388.         decoderIn.compact();
  389.     }

  390.     /**
  391.      * Write bytes from the specified byte array to the stream.
  392.      *
  393.      * @param b the byte array containing the bytes to write
  394.      * @throws IOException if an I/O error occurs.
  395.      */
  396.     @Override
  397.     public void write(final byte[] b) throws IOException {
  398.         write(b, 0, b.length);
  399.     }

  400.     /**
  401.      * Write bytes from the specified byte array to the stream.
  402.      *
  403.      * @param b   the byte array containing the bytes to write
  404.      * @param off the start offset in the byte array
  405.      * @param len the number of bytes to write
  406.      * @throws IOException if an I/O error occurs.
  407.      */
  408.     @Override
  409.     public void write(final byte[] b, int off, int len) throws IOException {
  410.         while (len > 0) {
  411.             final int c = Math.min(len, decoderIn.remaining());
  412.             decoderIn.put(b, off, c);
  413.             processInput(false);
  414.             len -= c;
  415.             off += c;
  416.         }
  417.         if (writeImmediately) {
  418.             flushOutput();
  419.         }
  420.     }

  421.     /**
  422.      * Write a single byte to the stream.
  423.      *
  424.      * @param b the byte to write
  425.      * @throws IOException if an I/O error occurs.
  426.      */
  427.     @Override
  428.     public void write(final int b) throws IOException {
  429.         write(new byte[] { (byte) b }, 0, 1);
  430.     }
  431. }