001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.output;
018    
019    import java.io.File;
020    import java.io.FileNotFoundException;
021    import java.io.FileOutputStream;
022    import java.io.IOException;
023    import java.io.OutputStream;
024    import java.io.OutputStreamWriter;
025    import java.io.StringWriter;
026    import java.io.Writer;
027    import java.util.regex.Matcher;
028    import java.util.regex.Pattern;
029    
030    import org.apache.commons.io.input.XmlStreamReader;
031    
032    /**
033     * Character stream that handles all the necessary Voodo to figure out the
034     * charset encoding of the XML document written to the stream.
035     *
036     * @author Herve Boutemy
037     * @version $Id: XmlStreamWriter.java 1004737 2010-10-05 17:56:04Z niallp $
038     * @see XmlStreamReader
039     * @since Commons IO 2.0
040     */
041    public class XmlStreamWriter extends Writer {
042        private static final int BUFFER_SIZE = 4096;
043    
044        private final OutputStream out;
045    
046        private final String defaultEncoding;
047    
048        private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
049    
050        private Writer writer;
051    
052        private String encoding;
053    
054        /**
055         * Construct an new XML stream writer for the specified output stream
056         * with a default encoding of UTF-8.
057         *
058         * @param out The output stream
059         */
060        public XmlStreamWriter(OutputStream out) {
061            this(out, null);
062        }
063    
064        /**
065         * Construct an new XML stream writer for the specified output stream
066         * with the specified default encoding.
067         *
068         * @param out The output stream
069         * @param defaultEncoding The default encoding if not encoding could be detected
070         */
071        public XmlStreamWriter(OutputStream out, String defaultEncoding) {
072            this.out = out;
073            this.defaultEncoding = (defaultEncoding != null ? defaultEncoding : "UTF-8");
074        }
075    
076        /**
077         * Construct an new XML stream writer for the specified file
078         * with a default encoding of UTF-8.
079         * 
080         * @param file The file to write to
081         * @throws FileNotFoundException if there is an error creating or
082         * opening the file
083         */
084        public XmlStreamWriter(File file) throws FileNotFoundException {
085            this(file, null);
086        }
087    
088        /**
089         * Construct an new XML stream writer for the specified file
090         * with the specified default encoding.
091         * 
092         * @param file The file to write to
093         * @param defaultEncoding The default encoding if not encoding could be detected
094         * @throws FileNotFoundException if there is an error creating or
095         * opening the file
096         */
097        public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException {
098            this(new FileOutputStream(file), defaultEncoding);
099        }
100    
101        /**
102         * Return the detected encoding.
103         *
104         * @return the detected encoding
105         */
106        public String getEncoding() {
107            return encoding;
108        }
109    
110        /**
111         * Return the default encoding.
112         *
113         * @return the default encoding
114         */
115        public String getDefaultEncoding() {
116            return defaultEncoding;
117        }
118    
119        /**
120         * Close the underlying writer.
121         *
122         * @throws IOException if an error occurs closing the underlying writer
123         */
124        @Override
125        public void close() throws IOException {
126            if (writer == null) {
127                encoding = defaultEncoding;
128                writer = new OutputStreamWriter(out, encoding);
129                writer.write(xmlPrologWriter.toString());
130            }
131            writer.close();
132        }
133    
134        /**
135         * Flush the underlying writer.
136         *
137         * @throws IOException if an error occurs flushing the underlying writer
138         */
139        @Override
140        public void flush() throws IOException {
141            if (writer != null) {
142                writer.flush();
143            }
144        }
145    
146        /**
147         * Detect the encoding.
148         *
149         * @param cbuf the buffer to write the characters from
150         * @param off The start offset
151         * @param len The number of characters to write
152         * @throws IOException if an error occurs detecting the encoding
153         */
154        private void detectEncoding(char[] cbuf, int off, int len)
155                throws IOException {
156            int size = len;
157            StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
158            if (xmlProlog.length() + len > BUFFER_SIZE) {
159                size = BUFFER_SIZE - xmlProlog.length();
160            }
161            xmlPrologWriter.write(cbuf, off, size);
162    
163            // try to determine encoding
164            if (xmlProlog.length() >= 5) {
165                if (xmlProlog.substring(0, 5).equals("<?xml")) {
166                    // try to extract encoding from XML prolog
167                    int xmlPrologEnd = xmlProlog.indexOf("?>");
168                    if (xmlPrologEnd > 0) {
169                        // ok, full XML prolog written: let's extract encoding
170                        Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
171                                xmlPrologEnd));
172                        if (m.find()) {
173                            encoding = m.group(1).toUpperCase();
174                            encoding = encoding.substring(1, encoding.length() - 1);
175                        } else {
176                            // no encoding found in XML prolog: using default
177                            // encoding
178                            encoding = defaultEncoding;
179                        }
180                    } else {
181                        if (xmlProlog.length() >= BUFFER_SIZE) {
182                            // no encoding found in first characters: using default
183                            // encoding
184                            encoding = defaultEncoding;
185                        }
186                    }
187                } else {
188                    // no XML prolog: using default encoding
189                    encoding = defaultEncoding;
190                }
191                if (encoding != null) {
192                    // encoding has been chosen: let's do it
193                    xmlPrologWriter = null;
194                    writer = new OutputStreamWriter(out, encoding);
195                    writer.write(xmlProlog.toString());
196                    if (len > size) {
197                        writer.write(cbuf, off + size, len - size);
198                    }
199                }
200            }
201        }
202    
203        /**
204         * Write the characters to the underlying writer, detecing encoding.
205         * 
206         * @param cbuf the buffer to write the characters from
207         * @param off The start offset
208         * @param len The number of characters to write
209         * @throws IOException if an error occurs detecting the encoding
210         */
211        @Override
212        public void write(char[] cbuf, int off, int len) throws IOException {
213            if (xmlPrologWriter != null) {
214                detectEncoding(cbuf, off, len);
215            } else {
216                writer.write(cbuf, off, len);
217            }
218        }
219    
220        static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
221    }