001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.output;
018    
019    import java.io.File;
020    import java.io.FileNotFoundException;
021    import java.io.FileOutputStream;
022    import java.io.IOException;
023    import java.io.OutputStream;
024    import java.io.OutputStreamWriter;
025    import java.io.StringWriter;
026    import java.io.Writer;
027    import java.util.regex.Matcher;
028    import java.util.regex.Pattern;
029    
030    import org.apache.commons.io.input.XmlStreamReader;
031    
032    /**
033     * Character stream that handles all the necessary Voodo to figure out the
034     * charset encoding of the XML document written to the stream.
035     *
036     * @version $Id: XmlStreamWriter.java 1304052 2012-03-22 20:55:29Z ggregory $
037     * @see XmlStreamReader
038     * @since 2.0
039     */
040    public class XmlStreamWriter extends Writer {
041        private static final int BUFFER_SIZE = 4096;
042    
043        private final OutputStream out;
044    
045        private final String defaultEncoding;
046    
047        private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
048    
049        private Writer writer;
050    
051        private String encoding;
052    
053        /**
054         * Construct an new XML stream writer for the specified output stream
055         * with a default encoding of UTF-8.
056         *
057         * @param out The output stream
058         */
059        public XmlStreamWriter(OutputStream out) {
060            this(out, null);
061        }
062    
063        /**
064         * Construct an new XML stream writer for the specified output stream
065         * with the specified default encoding.
066         *
067         * @param out The output stream
068         * @param defaultEncoding The default encoding if not encoding could be detected
069         */
070        public XmlStreamWriter(OutputStream out, String defaultEncoding) {
071            this.out = out;
072            this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
073        }
074    
075        /**
076         * Construct an new XML stream writer for the specified file
077         * with a default encoding of UTF-8.
078         * 
079         * @param file The file to write to
080         * @throws FileNotFoundException if there is an error creating or
081         * opening the file
082         */
083        public XmlStreamWriter(File file) throws FileNotFoundException {
084            this(file, null);
085        }
086    
087        /**
088         * Construct an new XML stream writer for the specified file
089         * with the specified default encoding.
090         * 
091         * @param file The file to write to
092         * @param defaultEncoding The default encoding if not encoding could be detected
093         * @throws FileNotFoundException if there is an error creating or
094         * opening the file
095         */
096        public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException {
097            this(new FileOutputStream(file), defaultEncoding);
098        }
099    
100        /**
101         * Return the detected encoding.
102         *
103         * @return the detected encoding
104         */
105        public String getEncoding() {
106            return encoding;
107        }
108    
109        /**
110         * Return the default encoding.
111         *
112         * @return the default encoding
113         */
114        public String getDefaultEncoding() {
115            return defaultEncoding;
116        }
117    
118        /**
119         * Close the underlying writer.
120         *
121         * @throws IOException if an error occurs closing the underlying writer
122         */
123        @Override
124        public void close() throws IOException {
125            if (writer == null) {
126                encoding = defaultEncoding;
127                writer = new OutputStreamWriter(out, encoding);
128                writer.write(xmlPrologWriter.toString());
129            }
130            writer.close();
131        }
132    
133        /**
134         * Flush the underlying writer.
135         *
136         * @throws IOException if an error occurs flushing the underlying writer
137         */
138        @Override
139        public void flush() throws IOException {
140            if (writer != null) {
141                writer.flush();
142            }
143        }
144    
145        /**
146         * Detect the encoding.
147         *
148         * @param cbuf the buffer to write the characters from
149         * @param off The start offset
150         * @param len The number of characters to write
151         * @throws IOException if an error occurs detecting the encoding
152         */
153        private void detectEncoding(char[] cbuf, int off, int len)
154                throws IOException {
155            int size = len;
156            StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
157            if (xmlProlog.length() + len > BUFFER_SIZE) {
158                size = BUFFER_SIZE - xmlProlog.length();
159            }
160            xmlPrologWriter.write(cbuf, off, size);
161    
162            // try to determine encoding
163            if (xmlProlog.length() >= 5) {
164                if (xmlProlog.substring(0, 5).equals("<?xml")) {
165                    // try to extract encoding from XML prolog
166                    int xmlPrologEnd = xmlProlog.indexOf("?>");
167                    if (xmlPrologEnd > 0) {
168                        // ok, full XML prolog written: let's extract encoding
169                        Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
170                                xmlPrologEnd));
171                        if (m.find()) {
172                            encoding = m.group(1).toUpperCase();
173                            encoding = encoding.substring(1, encoding.length() - 1);
174                        } else {
175                            // no encoding found in XML prolog: using default
176                            // encoding
177                            encoding = defaultEncoding;
178                        }
179                    } else {
180                        if (xmlProlog.length() >= BUFFER_SIZE) {
181                            // no encoding found in first characters: using default
182                            // encoding
183                            encoding = defaultEncoding;
184                        }
185                    }
186                } else {
187                    // no XML prolog: using default encoding
188                    encoding = defaultEncoding;
189                }
190                if (encoding != null) {
191                    // encoding has been chosen: let's do it
192                    xmlPrologWriter = null;
193                    writer = new OutputStreamWriter(out, encoding);
194                    writer.write(xmlProlog.toString());
195                    if (len > size) {
196                        writer.write(cbuf, off + size, len - size);
197                    }
198                }
199            }
200        }
201    
202        /**
203         * Write the characters to the underlying writer, detecing encoding.
204         * 
205         * @param cbuf the buffer to write the characters from
206         * @param off The start offset
207         * @param len The number of characters to write
208         * @throws IOException if an error occurs detecting the encoding
209         */
210        @Override
211        public void write(char[] cbuf, int off, int len) throws IOException {
212            if (xmlPrologWriter != null) {
213                detectEncoding(cbuf, off, len);
214            } else {
215                writer.write(cbuf, off, len);
216            }
217        }
218    
219        static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
220    }