View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.nio.charset.Charset;
28  import java.nio.charset.StandardCharsets;
29  import java.util.Locale;
30  import java.util.Objects;
31  import java.util.regex.Matcher;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.input.XmlStreamReader;
37  
38  /**
39   * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
40   * <p>
41   * To build an instance, use {@link Builder}.
42   * </p>
43   *
44   * @see Builder
45   * @see XmlStreamReader
46   * @since 2.0
47   */
48  public class XmlStreamWriter extends Writer {
49  
50      // @formatter:off
51      /**
52       * Builds a new {@link XmlStreamWriter}.
53       *
54       * <p>
55       * For example:
56       * </p>
57       * <pre>{@code
58       * WriterOutputStream w = WriterOutputStream.builder()
59       *   .setPath(path)
60       *   .setCharset(StandardCharsets.UTF_8)
61       *   .get();}
62       * </pre>
63       *
64       * @see #get()
65       * @since 2.12.0
66       */
67      // @formatter:off
68      public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
69  
70          /**
71           * Constructs a new builder of {@link XmlStreamWriter}.
72           */
73          public Builder() {
74              setCharsetDefault(StandardCharsets.UTF_8);
75              setCharset(StandardCharsets.UTF_8);
76          }
77  
78          /**
79           * Builds a new {@link XmlStreamWriter}.
80           * <p>
81           * You must set an aspect that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
82           * </p>
83           * <p>
84           * This builder uses the following aspects:
85           * </p>
86           * <ul>
87           * <li>{@link #getOutputStream()}</li>
88           * <li>{@link #getCharset()}</li>
89           * </ul>
90           *
91           * @return a new instance.
92           * @throws IllegalStateException         if the {@code origin} is {@code null}.
93           * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
94           * @throws IOException                   if an I/O error occurs converting to an {@link OutputStream} using {@link #getOutputStream()}.
95           * @see #getOutputStream()
96           * @see #getUnchecked()
97           */
98          @Override
99          public XmlStreamWriter get() throws IOException {
100             return new XmlStreamWriter(this);
101         }
102 
103     }
104 
105     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
106 
107     /**
108      * Constructs a new {@link Builder}.
109      *
110      * @return a new {@link Builder}.
111      * @since 2.12.0
112      */
113     public static Builder builder() {
114         return new Builder();
115     }
116 
117     private final OutputStream out;
118 
119     private final Charset defaultCharset;
120 
121     private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
122 
123     private Writer writer;
124 
125     private Charset charset;
126 
127     @SuppressWarnings("resource") // caller closes.
128     private XmlStreamWriter(final Builder builder) throws IOException {
129         this(builder.getOutputStream(), builder.getCharset());
130     }
131 
132     /**
133      * Constructs a new XML stream writer for the specified file
134      * with a default encoding of UTF-8.
135      *
136      * @param file The file to write to
137      * @throws FileNotFoundException if there is an error creating or
138      * opening the file
139      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
140      */
141     @Deprecated
142     public XmlStreamWriter(final File file) throws FileNotFoundException {
143         this(file, null);
144     }
145 
146     /**
147      * Constructs a new XML stream writer for the specified file
148      * with the specified default encoding.
149      *
150      * @param file The file to write to
151      * @param defaultEncoding The default encoding if not encoding could be detected
152      * @throws FileNotFoundException if there is an error creating or
153      * opening the file
154      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
155      */
156     @Deprecated
157     @SuppressWarnings("resource")
158     public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
159         this(new FileOutputStream(file), defaultEncoding);
160     }
161 
162     /**
163      * Constructs a new XML stream writer for the specified output stream
164      * with a default encoding of UTF-8.
165      *
166      * @param out The output stream
167      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
168      */
169     @Deprecated
170     public XmlStreamWriter(final OutputStream out) {
171         this(out, StandardCharsets.UTF_8);
172     }
173 
174     /**
175      * Constructs a new XML stream writer for the specified output stream
176      * with the specified default encoding.
177      *
178      * @param out The output stream
179      * @param defaultEncoding The default encoding if not encoding could be detected
180      */
181     private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
182         this.out = out;
183         this.defaultCharset = Objects.requireNonNull(defaultEncoding);
184     }
185 
186     /**
187      * Constructs a new XML stream writer for the specified output stream
188      * with the specified default encoding.
189      *
190      * @param out The output stream
191      * @param defaultEncoding The default encoding if not encoding could be detected
192      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
193      */
194     @Deprecated
195     public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
196         this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
197     }
198 
199     /**
200      * Closes the underlying writer.
201      *
202      * @throws IOException if an error occurs closing the underlying writer
203      */
204     @Override
205     public void close() throws IOException {
206         if (writer == null) {
207             charset = defaultCharset;
208             writer = new OutputStreamWriter(out, charset);
209             writer.write(prologWriter.toString());
210         }
211         writer.close();
212     }
213 
214     /**
215      * Detects the encoding.
216      *
217      * @param cbuf the buffer to write the characters from
218      * @param off The start offset
219      * @param len The number of characters to write
220      * @throws IOException if an error occurs detecting the encoding
221      */
222     private void detectEncoding(final char[] cbuf, final int off, final int len)
223             throws IOException {
224         int size = len;
225         final StringBuffer xmlProlog = prologWriter.getBuffer();
226         if (xmlProlog.length() + len > BUFFER_SIZE) {
227             size = BUFFER_SIZE - xmlProlog.length();
228         }
229         prologWriter.write(cbuf, off, size);
230 
231         // try to determine encoding
232         if (xmlProlog.length() >= 5) {
233             if (xmlProlog.substring(0, 5).equals("<?xml")) {
234                 // try to extract encoding from XML prolog
235                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
236                 if (xmlPrologEnd > 0) {
237                     // ok, full XML prolog written: let's extract encoding
238                     final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
239                             xmlPrologEnd));
240                     if (m.find()) {
241                         final String encName = m.group(1).toUpperCase(Locale.ROOT);
242                         charset = Charset.forName(encName.substring(1, encName.length() - 1));
243                     } else {
244                         // no encoding found in XML prolog: using default
245                         // encoding
246                         charset = defaultCharset;
247                     }
248                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
249                     // no encoding found in first characters: using default
250                     // encoding
251                     charset = defaultCharset;
252                 }
253             } else {
254                 // no XML prolog: using default encoding
255                 charset = defaultCharset;
256             }
257             if (charset != null) {
258                 // encoding has been chosen: let's do it
259                 prologWriter = null;
260                 writer = new OutputStreamWriter(out, charset);
261                 writer.write(xmlProlog.toString());
262                 if (len > size) {
263                     writer.write(cbuf, off + size, len - size);
264                 }
265             }
266         }
267     }
268 
269     /**
270      * Flushes the underlying writer.
271      *
272      * @throws IOException if an error occurs flushing the underlying writer
273      */
274     @Override
275     public void flush() throws IOException {
276         if (writer != null) {
277             writer.flush();
278         }
279     }
280 
281     /**
282      * Returns the default encoding.
283      *
284      * @return the default encoding
285      */
286     public String getDefaultEncoding() {
287         return defaultCharset.name();
288     }
289 
290     /**
291      * Returns the detected encoding.
292      *
293      * @return the detected encoding
294      */
295     public String getEncoding() {
296         return charset.name();
297     }
298 
299     /**
300      * Writes the characters to the underlying writer, detecting encoding.
301      *
302      * @param cbuf the buffer to write the characters from
303      * @param off The start offset
304      * @param len The number of characters to write
305      * @throws IOException if an error occurs detecting the encoding
306      */
307     @Override
308     public void write(final char[] cbuf, final int off, final int len) throws IOException {
309         if (prologWriter != null) {
310             detectEncoding(cbuf, off, len);
311         } else {
312             writer.write(cbuf, off, len);
313         }
314     }
315 }