View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.util.Locale;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.apache.commons.io.IOUtils;
32  import org.apache.commons.io.input.XmlStreamReader;
33  
34  /**
35   * Character stream that handles all the necessary Voodoo to figure out the
36   * charset encoding of the XML document written to the stream.
37   *
38   * @see XmlStreamReader
39   * @since 2.0
40   */
41  public class XmlStreamWriter extends Writer {
42      private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
43  
44      private final OutputStream out;
45  
46      private final String defaultEncoding;
47  
48      private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
49  
50      private Writer writer;
51  
52      private String encoding;
53  
54      /**
55       * Constructs a new XML stream writer for the specified output stream
56       * with a default encoding of UTF-8.
57       *
58       * @param out The output stream
59       */
60      public XmlStreamWriter(final OutputStream out) {
61          this(out, null);
62      }
63  
64      /**
65       * Constructs a new XML stream writer for the specified output stream
66       * with the specified default encoding.
67       *
68       * @param out The output stream
69       * @param defaultEncoding The default encoding if not encoding could be detected
70       */
71      public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
72          this.out = out;
73          this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
74      }
75  
76      /**
77       * Constructs a new XML stream writer for the specified file
78       * with a default encoding of UTF-8.
79       *
80       * @param file The file to write to
81       * @throws FileNotFoundException if there is an error creating or
82       * opening the file
83       */
84      public XmlStreamWriter(final File file) throws FileNotFoundException {
85          this(file, null);
86      }
87  
88      /**
89       * Constructs a new XML stream writer for the specified file
90       * with the specified default encoding.
91       *
92       * @param file The file to write to
93       * @param defaultEncoding The default encoding if not encoding could be detected
94       * @throws FileNotFoundException if there is an error creating or
95       * opening the file
96       */
97      @SuppressWarnings("resource")
98      public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
99          this(new FileOutputStream(file), defaultEncoding);
100     }
101 
102     /**
103      * Returns the detected encoding.
104      *
105      * @return the detected encoding
106      */
107     public String getEncoding() {
108         return encoding;
109     }
110 
111     /**
112      * Returns the default encoding.
113      *
114      * @return the default encoding
115      */
116     public String getDefaultEncoding() {
117         return defaultEncoding;
118     }
119 
120     /**
121      * Closes the underlying writer.
122      *
123      * @throws IOException if an error occurs closing the underlying writer
124      */
125     @Override
126     public void close() throws IOException {
127         if (writer == null) {
128             encoding = defaultEncoding;
129             writer = new OutputStreamWriter(out, encoding);
130             writer.write(xmlPrologWriter.toString());
131         }
132         writer.close();
133     }
134 
135     /**
136      * Flushes the underlying writer.
137      *
138      * @throws IOException if an error occurs flushing the underlying writer
139      */
140     @Override
141     public void flush() throws IOException {
142         if (writer != null) {
143             writer.flush();
144         }
145     }
146 
147     /**
148      * Detects the encoding.
149      *
150      * @param cbuf the buffer to write the characters from
151      * @param off The start offset
152      * @param len The number of characters to write
153      * @throws IOException if an error occurs detecting the encoding
154      */
155     private void detectEncoding(final char[] cbuf, final int off, final int len)
156             throws IOException {
157         int size = len;
158         final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
159         if (xmlProlog.length() + len > BUFFER_SIZE) {
160             size = BUFFER_SIZE - xmlProlog.length();
161         }
162         xmlPrologWriter.write(cbuf, off, size);
163 
164         // try to determine encoding
165         if (xmlProlog.length() >= 5) {
166             if (xmlProlog.substring(0, 5).equals("<?xml")) {
167                 // try to extract encoding from XML prolog
168                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
169                 if (xmlPrologEnd > 0) {
170                     // ok, full XML prolog written: let's extract encoding
171                     final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
172                             xmlPrologEnd));
173                     if (m.find()) {
174                         encoding = m.group(1).toUpperCase(Locale.ROOT);
175                         encoding = encoding.substring(1, encoding.length() - 1);
176                     } else {
177                         // no encoding found in XML prolog: using default
178                         // encoding
179                         encoding = defaultEncoding;
180                     }
181                 } else if (xmlProlog.length() >= BUFFER_SIZE) {
182                     // no encoding found in first characters: using default
183                     // encoding
184                     encoding = defaultEncoding;
185                 }
186             } else {
187                 // no XML prolog: using default encoding
188                 encoding = defaultEncoding;
189             }
190             if (encoding != null) {
191                 // encoding has been chosen: let's do it
192                 xmlPrologWriter = null;
193                 writer = new OutputStreamWriter(out, encoding);
194                 writer.write(xmlProlog.toString());
195                 if (len > size) {
196                     writer.write(cbuf, off + size, len - size);
197                 }
198             }
199         }
200     }
201 
202     /**
203      * Writes the characters to the underlying writer, detecting encoding.
204      *
205      * @param cbuf the buffer to write the characters from
206      * @param off The start offset
207      * @param len The number of characters to write
208      * @throws IOException if an error occurs detecting the encoding
209      */
210     @Override
211     public void write(final char[] cbuf, final int off, final int len) throws IOException {
212         if (xmlPrologWriter != null) {
213             detectEncoding(cbuf, off, len);
214         } else {
215             writer.write(cbuf, off, len);
216         }
217     }
218 
219     static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
220 }