View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.output;
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.io.input.XmlStreamReader;
31  
32  /**
33   * Character stream that handles all the necessary Voodoo to figure out the
34   * charset encoding of the XML document written to the stream.
35   *
36   * @see XmlStreamReader
37   * @since 2.0
38   */
39  public class XmlStreamWriter extends Writer {
40      private static final int BUFFER_SIZE = 4096;
41  
42      private final OutputStream out;
43  
44      private final String defaultEncoding;
45  
46      private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
47  
48      private Writer writer;
49  
50      private String encoding;
51  
52      /**
53       * Constructs a new XML stream writer for the specified output stream
54       * with a default encoding of UTF-8.
55       *
56       * @param out The output stream
57       */
58      public XmlStreamWriter(final OutputStream out) {
59          this(out, null);
60      }
61  
62      /**
63       * Constructs a new XML stream writer for the specified output stream
64       * with the specified default encoding.
65       *
66       * @param out The output stream
67       * @param defaultEncoding The default encoding if not encoding could be detected
68       */
69      public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
70          this.out = out;
71          this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
72      }
73  
74      /**
75       * Constructs a new XML stream writer for the specified file
76       * with a default encoding of UTF-8.
77       *
78       * @param file The file to write to
79       * @throws FileNotFoundException if there is an error creating or
80       * opening the file
81       */
82      public XmlStreamWriter(final File file) throws FileNotFoundException {
83          this(file, null);
84      }
85  
86      /**
87       * Constructs a new XML stream writer for the specified file
88       * with the specified default encoding.
89       *
90       * @param file The file to write to
91       * @param defaultEncoding The default encoding if not encoding could be detected
92       * @throws FileNotFoundException if there is an error creating or
93       * opening the file
94       */
95      public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
96          this(new FileOutputStream(file), defaultEncoding);
97      }
98  
99      /**
100      * Returns the detected encoding.
101      *
102      * @return the detected encoding
103      */
104     public String getEncoding() {
105         return encoding;
106     }
107 
108     /**
109      * Returns the default encoding.
110      *
111      * @return the default encoding
112      */
113     public String getDefaultEncoding() {
114         return defaultEncoding;
115     }
116 
117     /**
118      * Closes the underlying writer.
119      *
120      * @throws IOException if an error occurs closing the underlying writer
121      */
122     @Override
123     public void close() throws IOException {
124         if (writer == null) {
125             encoding = defaultEncoding;
126             writer = new OutputStreamWriter(out, encoding);
127             writer.write(xmlPrologWriter.toString());
128         }
129         writer.close();
130     }
131 
132     /**
133      * Flushes the underlying writer.
134      *
135      * @throws IOException if an error occurs flushing the underlying writer
136      */
137     @Override
138     public void flush() throws IOException {
139         if (writer != null) {
140             writer.flush();
141         }
142     }
143 
144     /**
145      * Detects the encoding.
146      *
147      * @param cbuf the buffer to write the characters from
148      * @param off The start offset
149      * @param len The number of characters to write
150      * @throws IOException if an error occurs detecting the encoding
151      */
152     private void detectEncoding(final char[] cbuf, final int off, final int len)
153             throws IOException {
154         int size = len;
155         final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
156         if (xmlProlog.length() + len > BUFFER_SIZE) {
157             size = BUFFER_SIZE - xmlProlog.length();
158         }
159         xmlPrologWriter.write(cbuf, off, size);
160 
161         // try to determine encoding
162         if (xmlProlog.length() >= 5) {
163             if (xmlProlog.substring(0, 5).equals("<?xml")) {
164                 // try to extract encoding from XML prolog
165                 final int xmlPrologEnd = xmlProlog.indexOf("?>");
166                 if (xmlPrologEnd > 0) {
167                     // ok, full XML prolog written: let's extract encoding
168                     final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
169                             xmlPrologEnd));
170                     if (m.find()) {
171                         encoding = m.group(1).toUpperCase();
172                         encoding = encoding.substring(1, encoding.length() - 1);
173                     } else {
174                         // no encoding found in XML prolog: using default
175                         // encoding
176                         encoding = defaultEncoding;
177                     }
178                 } else {
179                     if (xmlProlog.length() >= BUFFER_SIZE) {
180                         // no encoding found in first characters: using default
181                         // encoding
182                         encoding = defaultEncoding;
183                     }
184                 }
185             } else {
186                 // no XML prolog: using default encoding
187                 encoding = defaultEncoding;
188             }
189             if (encoding != null) {
190                 // encoding has been chosen: let's do it
191                 xmlPrologWriter = null;
192                 writer = new OutputStreamWriter(out, encoding);
193                 writer.write(xmlProlog.toString());
194                 if (len > size) {
195                     writer.write(cbuf, off + size, len - size);
196                 }
197             }
198         }
199     }
200 
201     /**
202      * Writes the characters to the underlying writer, detecting encoding.
203      *
204      * @param cbuf the buffer to write the characters from
205      * @param off The start offset
206      * @param len The number of characters to write
207      * @throws IOException if an error occurs detecting the encoding
208      */
209     @Override
210     public void write(final char[] cbuf, final int off, final int len) throws IOException {
211         if (xmlPrologWriter != null) {
212             detectEncoding(cbuf, off, len);
213         } else {
214             writer.write(cbuf, off, len);
215         }
216     }
217 
218     static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
219 }