XmlStreamWriter.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.io.output;

  18. import java.io.File;
  19. import java.io.FileNotFoundException;
  20. import java.io.FileOutputStream;
  21. import java.io.IOException;
  22. import java.io.OutputStream;
  23. import java.io.OutputStreamWriter;
  24. import java.io.StringWriter;
  25. import java.io.Writer;
  26. import java.nio.charset.Charset;
  27. import java.nio.charset.StandardCharsets;
  28. import java.util.Locale;
  29. import java.util.Objects;
  30. import java.util.regex.Matcher;

  31. import org.apache.commons.io.Charsets;
  32. import org.apache.commons.io.IOUtils;
  33. import org.apache.commons.io.build.AbstractStreamBuilder;
  34. import org.apache.commons.io.input.XmlStreamReader;

  35. /**
  36.  * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
  37.  * <p>
  38.  * To build an instance, use {@link Builder}.
  39.  * </p>
  40.  *
  41.  * @see Builder
  42.  * @see XmlStreamReader
  43.  * @since 2.0
  44.  */
  45. public class XmlStreamWriter extends Writer {

  46.     // @formatter:off
  47.     /**
  48.      * Builds a new {@link XmlStreamWriter}.
  49.      *
  50.      * <p>
  51.      * For example:
  52.      * </p>
  53.      * <pre>{@code
  54.      * WriterOutputStream w = WriterOutputStream.builder()
  55.      *   .setPath(path)
  56.      *   .setCharset(StandardCharsets.UTF_8)
  57.      *   .get();}
  58.      * </pre>
  59.      *
  60.      * @see #get()
  61.      * @since 2.12.0
  62.      */
  63.     // @formatter:off
  64.     public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {

  65.         /**
  66.          * Constructs a new builder of {@link XmlStreamWriter}.
  67.          */
  68.         public Builder() {
  69.             setCharsetDefault(StandardCharsets.UTF_8);
  70.             setCharset(StandardCharsets.UTF_8);
  71.         }

  72.         /**
  73.          * Builds a new {@link XmlStreamWriter}.
  74.          * <p>
  75.          * You must set an aspect that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
  76.          * </p>
  77.          * <p>
  78.          * This builder uses the following aspects:
  79.          * </p>
  80.          * <ul>
  81.          * <li>{@link #getOutputStream()}</li>
  82.          * <li>{@link #getCharset()}</li>
  83.          * </ul>
  84.          *
  85.          * @return a new instance.
  86.          * @throws IllegalStateException         if the {@code origin} is {@code null}.
  87.          * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
  88.          * @throws IOException                   if an I/O error occurs converting to an {@link OutputStream} using {@link #getOutputStream()}.
  89.          * @see #getOutputStream()
  90.          * @see #getUnchecked()
  91.          */
  92.         @Override
  93.         public XmlStreamWriter get() throws IOException {
  94.             return new XmlStreamWriter(getOutputStream(), getCharset());
  95.         }

  96.     }

  97.     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;

  98.     /**
  99.      * Constructs a new {@link Builder}.
  100.      *
  101.      * @return a new {@link Builder}.
  102.      * @since 2.12.0
  103.      */
  104.     public static Builder builder() {
  105.         return new Builder();
  106.     }

  107.     private final OutputStream out;

  108.     private final Charset defaultCharset;

  109.     private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);

  110.     private Writer writer;

  111.     private Charset charset;

  112.     /**
  113.      * Constructs a new XML stream writer for the specified file
  114.      * with a default encoding of UTF-8.
  115.      *
  116.      * @param file The file to write to
  117.      * @throws FileNotFoundException if there is an error creating or
  118.      * opening the file
  119.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  120.      */
  121.     @Deprecated
  122.     public XmlStreamWriter(final File file) throws FileNotFoundException {
  123.         this(file, null);
  124.     }

  125.     /**
  126.      * Constructs a new XML stream writer for the specified file
  127.      * with the specified default encoding.
  128.      *
  129.      * @param file The file to write to
  130.      * @param defaultEncoding The default encoding if not encoding could be detected
  131.      * @throws FileNotFoundException if there is an error creating or
  132.      * opening the file
  133.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  134.      */
  135.     @Deprecated
  136.     @SuppressWarnings("resource")
  137.     public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
  138.         this(new FileOutputStream(file), defaultEncoding);
  139.     }

  140.     /**
  141.      * Constructs a new XML stream writer for the specified output stream
  142.      * with a default encoding of UTF-8.
  143.      *
  144.      * @param out The output stream
  145.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  146.      */
  147.     @Deprecated
  148.     public XmlStreamWriter(final OutputStream out) {
  149.         this(out, StandardCharsets.UTF_8);
  150.     }

  151.     /**
  152.      * Constructs a new XML stream writer for the specified output stream
  153.      * with the specified default encoding.
  154.      *
  155.      * @param out The output stream
  156.      * @param defaultEncoding The default encoding if not encoding could be detected
  157.      */
  158.     private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
  159.         this.out = out;
  160.         this.defaultCharset = Objects.requireNonNull(defaultEncoding);
  161.     }

  162.     /**
  163.      * Constructs a new XML stream writer for the specified output stream
  164.      * with the specified default encoding.
  165.      *
  166.      * @param out The output stream
  167.      * @param defaultEncoding The default encoding if not encoding could be detected
  168.      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
  169.      */
  170.     @Deprecated
  171.     public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
  172.         this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
  173.     }

  174.     /**
  175.      * Closes the underlying writer.
  176.      *
  177.      * @throws IOException if an error occurs closing the underlying writer
  178.      */
  179.     @Override
  180.     public void close() throws IOException {
  181.         if (writer == null) {
  182.             charset = defaultCharset;
  183.             writer = new OutputStreamWriter(out, charset);
  184.             writer.write(prologWriter.toString());
  185.         }
  186.         writer.close();
  187.     }

  188.     /**
  189.      * Detects the encoding.
  190.      *
  191.      * @param cbuf the buffer to write the characters from
  192.      * @param off The start offset
  193.      * @param len The number of characters to write
  194.      * @throws IOException if an error occurs detecting the encoding
  195.      */
  196.     private void detectEncoding(final char[] cbuf, final int off, final int len)
  197.             throws IOException {
  198.         int size = len;
  199.         final StringBuffer xmlProlog = prologWriter.getBuffer();
  200.         if (xmlProlog.length() + len > BUFFER_SIZE) {
  201.             size = BUFFER_SIZE - xmlProlog.length();
  202.         }
  203.         prologWriter.write(cbuf, off, size);

  204.         // try to determine encoding
  205.         if (xmlProlog.length() >= 5) {
  206.             if (xmlProlog.substring(0, 5).equals("<?xml")) {
  207.                 // try to extract encoding from XML prolog
  208.                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
  209.                 if (xmlPrologEnd > 0) {
  210.                     // ok, full XML prolog written: let's extract encoding
  211.                     final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
  212.                             xmlPrologEnd));
  213.                     if (m.find()) {
  214.                         final String encName = m.group(1).toUpperCase(Locale.ROOT);
  215.                         charset = Charset.forName(encName.substring(1, encName.length() - 1));
  216.                     } else {
  217.                         // no encoding found in XML prolog: using default
  218.                         // encoding
  219.                         charset = defaultCharset;
  220.                     }
  221.                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
  222.                     // no encoding found in first characters: using default
  223.                     // encoding
  224.                     charset = defaultCharset;
  225.                 }
  226.             } else {
  227.                 // no XML prolog: using default encoding
  228.                 charset = defaultCharset;
  229.             }
  230.             if (charset != null) {
  231.                 // encoding has been chosen: let's do it
  232.                 prologWriter = null;
  233.                 writer = new OutputStreamWriter(out, charset);
  234.                 writer.write(xmlProlog.toString());
  235.                 if (len > size) {
  236.                     writer.write(cbuf, off + size, len - size);
  237.                 }
  238.             }
  239.         }
  240.     }

  241.     /**
  242.      * Flushes the underlying writer.
  243.      *
  244.      * @throws IOException if an error occurs flushing the underlying writer
  245.      */
  246.     @Override
  247.     public void flush() throws IOException {
  248.         if (writer != null) {
  249.             writer.flush();
  250.         }
  251.     }

  252.     /**
  253.      * Returns the default encoding.
  254.      *
  255.      * @return the default encoding
  256.      */
  257.     public String getDefaultEncoding() {
  258.         return defaultCharset.name();
  259.     }

  260.     /**
  261.      * Returns the detected encoding.
  262.      *
  263.      * @return the detected encoding
  264.      */
  265.     public String getEncoding() {
  266.         return charset.name();
  267.     }

  268.     /**
  269.      * Writes the characters to the underlying writer, detecting encoding.
  270.      *
  271.      * @param cbuf the buffer to write the characters from
  272.      * @param off The start offset
  273.      * @param len The number of characters to write
  274.      * @throws IOException if an error occurs detecting the encoding
  275.      */
  276.     @Override
  277.     public void write(final char[] cbuf, final int off, final int len) throws IOException {
  278.         if (prologWriter != null) {
  279.             detectEncoding(cbuf, off, len);
  280.         } else {
  281.             writer.write(cbuf, off, len);
  282.         }
  283.     }
  284. }