View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.nio.charset.Charset;
28  import java.nio.charset.StandardCharsets;
29  import java.util.Locale;
30  import java.util.Objects;
31  import java.util.regex.Matcher;
32  
33  import org.apache.commons.io.Charsets;
34  import org.apache.commons.io.IOUtils;
35  import org.apache.commons.io.build.AbstractStreamBuilder;
36  import org.apache.commons.io.input.XmlStreamReader;
37  
38  /**
39   * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
40   * <p>
41   * To build an instance, use {@link Builder}.
42   * </p>
43   *
44   * @see Builder
45   * @see XmlStreamReader
46   * @since 2.0
47   */
48  public class XmlStreamWriter extends Writer {
49  
50      // @formatter:off
51      /**
52       * Builds a new {@link XmlStreamWriter}.
53       *
54       * <p>
55       * For example:
56       * </p>
57       * <pre>{@code
58       * WriterOutputStream w = WriterOutputStream.builder()
59       *   .setPath(path)
60       *   .setCharset(StandardCharsets.UTF_8)
61       *   .get();}
62       * </pre>
63       *
64       * @see #get()
65       * @since 2.12.0
66       */
67      // @formatter:off
68      public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
69  
70          /**
71           * Constructs a new {@link Builder}.
72           */
73          public Builder() {
74              setCharsetDefault(StandardCharsets.UTF_8);
75              setCharset(StandardCharsets.UTF_8);
76          }
77  
78          /**
79           * Builds a new {@link XmlStreamWriter}.
80           * <p>
81           * You must set input that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
82           * </p>
83           * <p>
84           * This builder use the following aspects:
85           * </p>
86           * <ul>
87           * <li>{@link #getOutputStream()}</li>
88           * <li>{@link #getCharset()}</li>
89           * </ul>
90           *
91           * @return a new instance.
92           * @throws IllegalStateException         if the {@code origin} is {@code null}.
93           * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
94           * @throws IOException                   if an I/O error occurs.
95           * @see #getOutputStream()
96           */
97          @SuppressWarnings("resource")
98          @Override
99          public XmlStreamWriter get() throws IOException {
100             return new XmlStreamWriter(getOutputStream(), getCharset());
101         }
102 
103     }
104 
105     private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
106 
107     /**
108      * Constructs a new {@link Builder}.
109      *
110      * @return a new {@link Builder}.
111      * @since 2.12.0
112      */
113     public static Builder builder() {
114         return new Builder();
115     }
116 
117     private final OutputStream out;
118 
119     private final Charset defaultCharset;
120 
121     private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
122 
123     private Writer writer;
124 
125     private Charset charset;
126 
127     /**
128      * Constructs a new XML stream writer for the specified file
129      * with a default encoding of UTF-8.
130      *
131      * @param file The file to write to
132      * @throws FileNotFoundException if there is an error creating or
133      * opening the file
134      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
135      */
136     @Deprecated
137     public XmlStreamWriter(final File file) throws FileNotFoundException {
138         this(file, null);
139     }
140 
141     /**
142      * Constructs a new XML stream writer for the specified file
143      * with the specified default encoding.
144      *
145      * @param file The file to write to
146      * @param defaultEncoding The default encoding if not encoding could be detected
147      * @throws FileNotFoundException if there is an error creating or
148      * opening the file
149      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
150      */
151     @Deprecated
152     @SuppressWarnings("resource")
153     public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
154         this(new FileOutputStream(file), defaultEncoding);
155     }
156 
157     /**
158      * Constructs a new XML stream writer for the specified output stream
159      * with a default encoding of UTF-8.
160      *
161      * @param out The output stream
162      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
163      */
164     @Deprecated
165     public XmlStreamWriter(final OutputStream out) {
166         this(out, StandardCharsets.UTF_8);
167     }
168 
169     /**
170      * Constructs a new XML stream writer for the specified output stream
171      * with the specified default encoding.
172      *
173      * @param out The output stream
174      * @param defaultEncoding The default encoding if not encoding could be detected
175      */
176     private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
177         this.out = out;
178         this.defaultCharset = Objects.requireNonNull(defaultEncoding);
179     }
180 
181     /**
182      * Constructs a new XML stream writer for the specified output stream
183      * with the specified default encoding.
184      *
185      * @param out The output stream
186      * @param defaultEncoding The default encoding if not encoding could be detected
187      * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
188      */
189     @Deprecated
190     public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
191         this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
192     }
193 
194     /**
195      * Closes the underlying writer.
196      *
197      * @throws IOException if an error occurs closing the underlying writer
198      */
199     @Override
200     public void close() throws IOException {
201         if (writer == null) {
202             charset = defaultCharset;
203             writer = new OutputStreamWriter(out, charset);
204             writer.write(prologWriter.toString());
205         }
206         writer.close();
207     }
208 
209     /**
210      * Detects the encoding.
211      *
212      * @param cbuf the buffer to write the characters from
213      * @param off The start offset
214      * @param len The number of characters to write
215      * @throws IOException if an error occurs detecting the encoding
216      */
217     private void detectEncoding(final char[] cbuf, final int off, final int len)
218             throws IOException {
219         int size = len;
220         final StringBuffer xmlProlog = prologWriter.getBuffer();
221         if (xmlProlog.length() + len > BUFFER_SIZE) {
222             size = BUFFER_SIZE - xmlProlog.length();
223         }
224         prologWriter.write(cbuf, off, size);
225 
226         // try to determine encoding
227         if (xmlProlog.length() >= 5) {
228             if (xmlProlog.substring(0, 5).equals("<?xml")) {
229                 // try to extract encoding from XML prolog
230                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
231                 if (xmlPrologEnd > 0) {
232                     // ok, full XML prolog written: let's extract encoding
233                     final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
234                             xmlPrologEnd));
235                     if (m.find()) {
236                         final String encName = m.group(1).toUpperCase(Locale.ROOT);
237                         charset = Charset.forName(encName.substring(1, encName.length() - 1));
238                     } else {
239                         // no encoding found in XML prolog: using default
240                         // encoding
241                         charset = defaultCharset;
242                     }
243                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
244                     // no encoding found in first characters: using default
245                     // encoding
246                     charset = defaultCharset;
247                 }
248             } else {
249                 // no XML prolog: using default encoding
250                 charset = defaultCharset;
251             }
252             if (charset != null) {
253                 // encoding has been chosen: let's do it
254                 prologWriter = null;
255                 writer = new OutputStreamWriter(out, charset);
256                 writer.write(xmlProlog.toString());
257                 if (len > size) {
258                     writer.write(cbuf, off + size, len - size);
259                 }
260             }
261         }
262     }
263 
264     /**
265      * Flushes the underlying writer.
266      *
267      * @throws IOException if an error occurs flushing the underlying writer
268      */
269     @Override
270     public void flush() throws IOException {
271         if (writer != null) {
272             writer.flush();
273         }
274     }
275 
276     /**
277      * Returns the default encoding.
278      *
279      * @return the default encoding
280      */
281     public String getDefaultEncoding() {
282         return defaultCharset.name();
283     }
284 
285     /**
286      * Returns the detected encoding.
287      *
288      * @return the detected encoding
289      */
290     public String getEncoding() {
291         return charset.name();
292     }
293 
294     /**
295      * Writes the characters to the underlying writer, detecting encoding.
296      *
297      * @param cbuf the buffer to write the characters from
298      * @param off The start offset
299      * @param len The number of characters to write
300      * @throws IOException if an error occurs detecting the encoding
301      */
302     @Override
303     public void write(final char[] cbuf, final int off, final int len) throws IOException {
304         if (prologWriter != null) {
305             detectEncoding(cbuf, off, len);
306         } else {
307             writer.write(cbuf, off, len);
308         }
309     }
310 }