View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.nio.charset.Charset;
28  import java.nio.charset.StandardCharsets;
29  import java.util.Locale;
30  import java.util.Objects;
31  import java.util.regex.Matcher;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.input.XmlStreamReader;
37  
38  /**
39   * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
40   * <p>
41   * To build an instance, see {@link Builder}.
42   * </p>
43   *
44   * @see XmlStreamReader
45   * @since 2.0
46   */
47  public class XmlStreamWriter extends Writer {
48  
49      /**
50       * Builds a new {@link XmlStreamWriter} instance.
51       * <p>
52       * For example:
53       * </p>
54       * <pre>{@code
55       * WriterOutputStream w = WriterOutputStream.builder()
56       *   .setPath(path)
57       *   .setCharset(StandardCharsets.UTF_8)
58       *   .get();}
59       * </pre>
60       *
61       * @since 2.12.0
62       */
63      public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
64  
65          /**
66           * Constructs a new Builder.
67           */
68          public Builder() {
69              setCharsetDefault(StandardCharsets.UTF_8);
70              setCharset(StandardCharsets.UTF_8);
71          }
72  
73          /**
74           * Constructs a new instance.
75           * <p>
76           * This builder use the aspect OutputStream, OpenOption[], and Charset.
77           * </p>
78           * <p>
79           * You must provide an origin that can be converted to an OutputStream by this builder, otherwise, this call will throw an
80           * {@link UnsupportedOperationException}.
81           * </p>
82           *
83           * @return a new instance.
84           * @throws UnsupportedOperationException if the origin cannot provide an OutputStream.
85           * @throws IOException                   if an I/O error occurs.
86           * @see #getOutputStream()
87           */
88          @SuppressWarnings("resource")
89          @Override
90          public XmlStreamWriter get() throws IOException {
91              return new XmlStreamWriter(getOutputStream(), getCharset());
92          }
93  
94      }
95  
96      private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
97  
98      /**
99       * Constructs a new {@link Builder}.
100      *
101      * @return a new {@link Builder}.
102      * @since 2.12.0
103      */
104     public static Builder builder() {
105         return new Builder();
106     }
107 
108     private final OutputStream out;
109 
110     private final Charset defaultCharset;
111 
112     private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
113 
114     private Writer writer;
115 
116     private Charset charset;
117 
118     /**
119      * Constructs a new XML stream writer for the specified file
120      * with a default encoding of UTF-8.
121      *
122      * @param file The file to write to
123      * @throws FileNotFoundException if there is an error creating or
124      * opening the file
125      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
126      */
127     @Deprecated
128     public XmlStreamWriter(final File file) throws FileNotFoundException {
129         this(file, null);
130     }
131 
132     /**
133      * Constructs a new XML stream writer for the specified file
134      * with the specified default encoding.
135      *
136      * @param file The file to write to
137      * @param defaultEncoding The default encoding if not encoding could be detected
138      * @throws FileNotFoundException if there is an error creating or
139      * opening the file
140      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
141      */
142     @Deprecated
143     @SuppressWarnings("resource")
144     public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
145         this(new FileOutputStream(file), defaultEncoding);
146     }
147 
148     /**
149      * Constructs a new XML stream writer for the specified output stream
150      * with a default encoding of UTF-8.
151      *
152      * @param out The output stream
153      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
154      */
155     @Deprecated
156     public XmlStreamWriter(final OutputStream out) {
157         this(out, StandardCharsets.UTF_8);
158     }
159 
160     /**
161      * Constructs a new XML stream writer for the specified output stream
162      * with the specified default encoding.
163      *
164      * @param out The output stream
165      * @param defaultEncoding The default encoding if not encoding could be detected
166      */
167     private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
168         this.out = out;
169         this.defaultCharset = Objects.requireNonNull(defaultEncoding);
170     }
171 
172     /**
173      * Constructs a new XML stream writer for the specified output stream
174      * with the specified default encoding.
175      *
176      * @param out The output stream
177      * @param defaultEncoding The default encoding if not encoding could be detected
178      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
179      */
180     @Deprecated
181     public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
182         this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
183     }
184 
185     /**
186      * Closes the underlying writer.
187      *
188      * @throws IOException if an error occurs closing the underlying writer
189      */
190     @Override
191     public void close() throws IOException {
192         if (writer == null) {
193             charset = defaultCharset;
194             writer = new OutputStreamWriter(out, charset);
195             writer.write(prologWriter.toString());
196         }
197         writer.close();
198     }
199 
200     /**
201      * Detects the encoding.
202      *
203      * @param cbuf the buffer to write the characters from
204      * @param off The start offset
205      * @param len The number of characters to write
206      * @throws IOException if an error occurs detecting the encoding
207      */
208     private void detectEncoding(final char[] cbuf, final int off, final int len)
209             throws IOException {
210         int size = len;
211         final StringBuffer xmlProlog = prologWriter.getBuffer();
212         if (xmlProlog.length() + len > BUFFER_SIZE) {
213             size = BUFFER_SIZE - xmlProlog.length();
214         }
215         prologWriter.write(cbuf, off, size);
216 
217         // try to determine encoding
218         if (xmlProlog.length() >= 5) {
219             if (xmlProlog.substring(0, 5).equals("<?xml")) {
220                 // try to extract encoding from XML prolog
221                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
222                 if (xmlPrologEnd > 0) {
223                     // ok, full XML prolog written: let's extract encoding
224                     final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
225                             xmlPrologEnd));
226                     if (m.find()) {
227                         final String encName = m.group(1).toUpperCase(Locale.ROOT);
228                         charset = Charset.forName(encName.substring(1, encName.length() - 1));
229                     } else {
230                         // no encoding found in XML prolog: using default
231                         // encoding
232                         charset = defaultCharset;
233                     }
234                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
235                     // no encoding found in first characters: using default
236                     // encoding
237                     charset = defaultCharset;
238                 }
239             } else {
240                 // no XML prolog: using default encoding
241                 charset = defaultCharset;
242             }
243             if (charset != null) {
244                 // encoding has been chosen: let's do it
245                 prologWriter = null;
246                 writer = new OutputStreamWriter(out, charset);
247                 writer.write(xmlProlog.toString());
248                 if (len > size) {
249                     writer.write(cbuf, off + size, len - size);
250                 }
251             }
252         }
253     }
254 
255     /**
256      * Flushes the underlying writer.
257      *
258      * @throws IOException if an error occurs flushing the underlying writer
259      */
260     @Override
261     public void flush() throws IOException {
262         if (writer != null) {
263             writer.flush();
264         }
265     }
266 
267     /**
268      * Returns the default encoding.
269      *
270      * @return the default encoding
271      */
272     public String getDefaultEncoding() {
273         return defaultCharset.name();
274     }
275 
276     /**
277      * Returns the detected encoding.
278      *
279      * @return the detected encoding
280      */
281     public String getEncoding() {
282         return charset.name();
283     }
284 
285     /**
286      * Writes the characters to the underlying writer, detecting encoding.
287      *
288      * @param cbuf the buffer to write the characters from
289      * @param off The start offset
290      * @param len The number of characters to write
291      * @throws IOException if an error occurs detecting the encoding
292      */
293     @Override
294     public void write(final char[] cbuf, final int off, final int len) throws IOException {
295         if (prologWriter != null) {
296             detectEncoding(cbuf, off, len);
297         } else {
298             writer.write(cbuf, off, len);
299         }
300     }
301 }