001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.util.Locale;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import org.apache.commons.io.IOUtils;
032import org.apache.commons.io.input.XmlStreamReader;
033
034/**
035 * Character stream that handles all the necessary Voodoo to figure out the
036 * charset encoding of the XML document written to the stream.
037 *
038 * @see XmlStreamReader
039 * @since 2.0
040 */
041public class XmlStreamWriter extends Writer {
042    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
043
044    private final OutputStream out;
045
046    private final String defaultEncoding;
047
048    private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
049
050    private Writer writer;
051
052    private String encoding;
053
054    /**
055     * Constructs a new XML stream writer for the specified output stream
056     * with a default encoding of UTF-8.
057     *
058     * @param out The output stream
059     */
060    public XmlStreamWriter(final OutputStream out) {
061        this(out, null);
062    }
063
064    /**
065     * Constructs a new XML stream writer for the specified output stream
066     * with the specified default encoding.
067     *
068     * @param out The output stream
069     * @param defaultEncoding The default encoding if not encoding could be detected
070     */
071    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
072        this.out = out;
073        this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
074    }
075
076    /**
077     * Constructs a new XML stream writer for the specified file
078     * with a default encoding of UTF-8.
079     *
080     * @param file The file to write to
081     * @throws FileNotFoundException if there is an error creating or
082     * opening the file
083     */
084    public XmlStreamWriter(final File file) throws FileNotFoundException {
085        this(file, null);
086    }
087
088    /**
089     * Constructs a new XML stream writer for the specified file
090     * with the specified default encoding.
091     *
092     * @param file The file to write to
093     * @param defaultEncoding The default encoding if not encoding could be detected
094     * @throws FileNotFoundException if there is an error creating or
095     * opening the file
096     */
097    @SuppressWarnings("resource")
098    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
099        this(new FileOutputStream(file), defaultEncoding);
100    }
101
102    /**
103     * Returns the detected encoding.
104     *
105     * @return the detected encoding
106     */
107    public String getEncoding() {
108        return encoding;
109    }
110
111    /**
112     * Returns the default encoding.
113     *
114     * @return the default encoding
115     */
116    public String getDefaultEncoding() {
117        return defaultEncoding;
118    }
119
120    /**
121     * Closes the underlying writer.
122     *
123     * @throws IOException if an error occurs closing the underlying writer
124     */
125    @Override
126    public void close() throws IOException {
127        if (writer == null) {
128            encoding = defaultEncoding;
129            writer = new OutputStreamWriter(out, encoding);
130            writer.write(xmlPrologWriter.toString());
131        }
132        writer.close();
133    }
134
135    /**
136     * Flushes the underlying writer.
137     *
138     * @throws IOException if an error occurs flushing the underlying writer
139     */
140    @Override
141    public void flush() throws IOException {
142        if (writer != null) {
143            writer.flush();
144        }
145    }
146
147    /**
148     * Detects the encoding.
149     *
150     * @param cbuf the buffer to write the characters from
151     * @param off The start offset
152     * @param len The number of characters to write
153     * @throws IOException if an error occurs detecting the encoding
154     */
155    private void detectEncoding(final char[] cbuf, final int off, final int len)
156            throws IOException {
157        int size = len;
158        final StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
159        if (xmlProlog.length() + len > BUFFER_SIZE) {
160            size = BUFFER_SIZE - xmlProlog.length();
161        }
162        xmlPrologWriter.write(cbuf, off, size);
163
164        // try to determine encoding
165        if (xmlProlog.length() >= 5) {
166            if (xmlProlog.substring(0, 5).equals("<?xml")) {
167                // try to extract encoding from XML prolog
168                final int xmlPrologEnd = xmlProlog.indexOf("?>");
169                if (xmlPrologEnd > 0) {
170                    // ok, full XML prolog written: let's extract encoding
171                    final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
172                            xmlPrologEnd));
173                    if (m.find()) {
174                        encoding = m.group(1).toUpperCase(Locale.ROOT);
175                        encoding = encoding.substring(1, encoding.length() - 1);
176                    } else {
177                        // no encoding found in XML prolog: using default
178                        // encoding
179                        encoding = defaultEncoding;
180                    }
181                } else {
182                    if (xmlProlog.length() >= BUFFER_SIZE) {
183                        // no encoding found in first characters: using default
184                        // encoding
185                        encoding = defaultEncoding;
186                    }
187                }
188            } else {
189                // no XML prolog: using default encoding
190                encoding = defaultEncoding;
191            }
192            if (encoding != null) {
193                // encoding has been chosen: let's do it
194                xmlPrologWriter = null;
195                writer = new OutputStreamWriter(out, encoding);
196                writer.write(xmlProlog.toString());
197                if (len > size) {
198                    writer.write(cbuf, off + size, len - size);
199                }
200            }
201        }
202    }
203
204    /**
205     * Writes the characters to the underlying writer, detecting encoding.
206     *
207     * @param cbuf the buffer to write the characters from
208     * @param off The start offset
209     * @param len The number of characters to write
210     * @throws IOException if an error occurs detecting the encoding
211     */
212    @Override
213    public void write(final char[] cbuf, final int off, final int len) throws IOException {
214        if (xmlPrologWriter != null) {
215            detectEncoding(cbuf, off, len);
216        } else {
217            writer.write(cbuf, off, len);
218        }
219    }
220
221    static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
222}