001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029
030import org.apache.commons.io.input.XmlStreamReader;
031
032/**
033 * Character stream that handles all the necessary Voodo to figure out the
034 * charset encoding of the XML document written to the stream.
035 *
036 * @version $Id: XmlStreamWriter.java 1415850 2012-11-30 20:51:39Z ggregory $
037 * @see XmlStreamReader
038 * @since 2.0
039 */
040public class XmlStreamWriter extends Writer {
041    private static final int BUFFER_SIZE = 4096;
042
043    private final OutputStream out;
044
045    private final String defaultEncoding;
046
047    private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
048
049    private Writer writer;
050
051    private String encoding;
052
053    /**
054     * Construct an new XML stream writer for the specified output stream
055     * with a default encoding of UTF-8.
056     *
057     * @param out The output stream
058     */
059    public XmlStreamWriter(final OutputStream out) {
060        this(out, null);
061    }
062
063    /**
064     * Construct an new XML stream writer for the specified output stream
065     * with the specified default encoding.
066     *
067     * @param out The output stream
068     * @param defaultEncoding The default encoding if not encoding could be detected
069     */
070    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
071        this.out = out;
072        this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
073    }
074
075    /**
076     * Construct an new XML stream writer for the specified file
077     * with a default encoding of UTF-8.
078     * 
079     * @param file The file to write to
080     * @throws FileNotFoundException if there is an error creating or
081     * opening the file
082     */
083    public XmlStreamWriter(final File file) throws FileNotFoundException {
084        this(file, null);
085    }
086
087    /**
088     * Construct an new XML stream writer for the specified file
089     * with the specified default encoding.
090     * 
091     * @param file The file to write to
092     * @param defaultEncoding The default encoding if not encoding could be detected
093     * @throws FileNotFoundException if there is an error creating or
094     * opening the file
095     */
096    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
097        this(new FileOutputStream(file), defaultEncoding);
098    }
099
100    /**
101     * Return the detected encoding.
102     *
103     * @return the detected encoding
104     */
105    public String getEncoding() {
106        return encoding;
107    }
108
109    /**
110     * Return the default encoding.
111     *
112     * @return the default encoding
113     */
114    public String getDefaultEncoding() {
115        return defaultEncoding;
116    }
117
118    /**
119     * Close the underlying writer.
120     *
121     * @throws IOException if an error occurs closing the underlying writer
122     */
123    @Override
124    public void close() throws IOException {
125        if (writer == null) {
126            encoding = defaultEncoding;
127            writer = new OutputStreamWriter(out, encoding);
128            writer.write(xmlPrologWriter.toString());
129        }
130        writer.close();
131    }
132
133    /**
134     * Flush the underlying writer.
135     *
136     * @throws IOException if an error occurs flushing the underlying writer
137     */
138    @Override
139    public void flush() throws IOException {
140        if (writer != null) {
141            writer.flush();
142        }
143    }
144
145    /**
146     * Detect the encoding.
147     *
148     * @param cbuf the buffer to write the characters from
149     * @param off The start offset
150     * @param len The number of characters to write
151     * @throws IOException if an error occurs detecting the encoding
152     */
153    private void detectEncoding(final char[] cbuf, final int off, final int len)
154            throws IOException {
155        int size = len;
156        final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
157        if (xmlProlog.length() + len > BUFFER_SIZE) {
158            size = BUFFER_SIZE - xmlProlog.length();
159        }
160        xmlPrologWriter.write(cbuf, off, size);
161
162        // try to determine encoding
163        if (xmlProlog.length() >= 5) {
164            if (xmlProlog.substring(0, 5).equals("<?xml")) {
165                // try to extract encoding from XML prolog
166                final int xmlPrologEnd = xmlProlog.indexOf("?>");
167                if (xmlPrologEnd > 0) {
168                    // ok, full XML prolog written: let's extract encoding
169                    final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
170                            xmlPrologEnd));
171                    if (m.find()) {
172                        encoding = m.group(1).toUpperCase();
173                        encoding = encoding.substring(1, encoding.length() - 1);
174                    } else {
175                        // no encoding found in XML prolog: using default
176                        // encoding
177                        encoding = defaultEncoding;
178                    }
179                } else {
180                    if (xmlProlog.length() >= BUFFER_SIZE) {
181                        // no encoding found in first characters: using default
182                        // encoding
183                        encoding = defaultEncoding;
184                    }
185                }
186            } else {
187                // no XML prolog: using default encoding
188                encoding = defaultEncoding;
189            }
190            if (encoding != null) {
191                // encoding has been chosen: let's do it
192                xmlPrologWriter = null;
193                writer = new OutputStreamWriter(out, encoding);
194                writer.write(xmlProlog.toString());
195                if (len > size) {
196                    writer.write(cbuf, off + size, len - size);
197                }
198            }
199        }
200    }
201
202    /**
203     * Write the characters to the underlying writer, detecing encoding.
204     * 
205     * @param cbuf the buffer to write the characters from
206     * @param off The start offset
207     * @param len The number of characters to write
208     * @throws IOException if an error occurs detecting the encoding
209     */
210    @Override
211    public void write(final char[] cbuf, final int off, final int len) throws IOException {
212        if (xmlPrologWriter != null) {
213            detectEncoding(cbuf, off, len);
214        } else {
215            writer.write(cbuf, off, len);
216        }
217    }
218
219    static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
220}