001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.Locale;
030import java.util.Objects;
031import java.util.regex.Matcher;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.input.XmlStreamReader;
037
038/**
039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
040 * <p>
041 * To build an instance, use {@link Builder}.
042 * </p>
043 *
044 * @see Builder
045 * @see XmlStreamReader
046 * @since 2.0
047 */
048public class XmlStreamWriter extends Writer {
049
050    // @formatter:off
051    /**
052     * Builds a new {@link XmlStreamWriter}.
053     *
054     * <p>
055     * For example:
056     * </p>
057     * <pre>{@code
058     * WriterOutputStream w = WriterOutputStream.builder()
059     *   .setPath(path)
060     *   .setCharset(StandardCharsets.UTF_8)
061     *   .get();}
062     * </pre>
063     *
064     * @see #get()
065     * @since 2.12.0
066     */
067    // @formatter:off
068    public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
069
070        /**
071         * Constructs a new {@link Builder}.
072         */
073        public Builder() {
074            setCharsetDefault(StandardCharsets.UTF_8);
075            setCharset(StandardCharsets.UTF_8);
076        }
077
078        /**
079         * Builds a new {@link XmlStreamWriter}.
080         * <p>
081         * You must set input that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
082         * </p>
083         * <p>
084         * This builder use the following aspects:
085         * </p>
086         * <ul>
087         * <li>{@link #getOutputStream()}</li>
088         * <li>{@link #getCharset()}</li>
089         * </ul>
090         *
091         * @return a new instance.
092         * @throws IllegalStateException         if the {@code origin} is {@code null}.
093         * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
094         * @throws IOException                   if an I/O error occurs.
095         * @see #getOutputStream()
096         */
097        @SuppressWarnings("resource")
098        @Override
099        public XmlStreamWriter get() throws IOException {
100            return new XmlStreamWriter(getOutputStream(), getCharset());
101        }
102
103    }
104
105    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
106
107    /**
108     * Constructs a new {@link Builder}.
109     *
110     * @return a new {@link Builder}.
111     * @since 2.12.0
112     */
113    public static Builder builder() {
114        return new Builder();
115    }
116
117    private final OutputStream out;
118
119    private final Charset defaultCharset;
120
121    private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
122
123    private Writer writer;
124
125    private Charset charset;
126
127    /**
128     * Constructs a new XML stream writer for the specified file
129     * with a default encoding of UTF-8.
130     *
131     * @param file The file to write to
132     * @throws FileNotFoundException if there is an error creating or
133     * opening the file
134     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
135     */
136    @Deprecated
137    public XmlStreamWriter(final File file) throws FileNotFoundException {
138        this(file, null);
139    }
140
141    /**
142     * Constructs a new XML stream writer for the specified file
143     * with the specified default encoding.
144     *
145     * @param file The file to write to
146     * @param defaultEncoding The default encoding if not encoding could be detected
147     * @throws FileNotFoundException if there is an error creating or
148     * opening the file
149     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
150     */
151    @Deprecated
152    @SuppressWarnings("resource")
153    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
154        this(new FileOutputStream(file), defaultEncoding);
155    }
156
157    /**
158     * Constructs a new XML stream writer for the specified output stream
159     * with a default encoding of UTF-8.
160     *
161     * @param out The output stream
162     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
163     */
164    @Deprecated
165    public XmlStreamWriter(final OutputStream out) {
166        this(out, StandardCharsets.UTF_8);
167    }
168
169    /**
170     * Constructs a new XML stream writer for the specified output stream
171     * with the specified default encoding.
172     *
173     * @param out The output stream
174     * @param defaultEncoding The default encoding if not encoding could be detected
175     */
176    private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
177        this.out = out;
178        this.defaultCharset = Objects.requireNonNull(defaultEncoding);
179    }
180
181    /**
182     * Constructs a new XML stream writer for the specified output stream
183     * with the specified default encoding.
184     *
185     * @param out The output stream
186     * @param defaultEncoding The default encoding if not encoding could be detected
187     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
188     */
189    @Deprecated
190    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
191        this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
192    }
193
194    /**
195     * Closes the underlying writer.
196     *
197     * @throws IOException if an error occurs closing the underlying writer
198     */
199    @Override
200    public void close() throws IOException {
201        if (writer == null) {
202            charset = defaultCharset;
203            writer = new OutputStreamWriter(out, charset);
204            writer.write(prologWriter.toString());
205        }
206        writer.close();
207    }
208
209    /**
210     * Detects the encoding.
211     *
212     * @param cbuf the buffer to write the characters from
213     * @param off The start offset
214     * @param len The number of characters to write
215     * @throws IOException if an error occurs detecting the encoding
216     */
217    private void detectEncoding(final char[] cbuf, final int off, final int len)
218            throws IOException {
219        int size = len;
220        final StringBuffer xmlProlog = prologWriter.getBuffer();
221        if (xmlProlog.length() + len > BUFFER_SIZE) {
222            size = BUFFER_SIZE - xmlProlog.length();
223        }
224        prologWriter.write(cbuf, off, size);
225
226        // try to determine encoding
227        if (xmlProlog.length() >= 5) {
228            if (xmlProlog.substring(0, 5).equals("<?xml")) {
229                // try to extract encoding from XML prolog
230                final int xmlPrologEnd = xmlProlog.indexOf("?>");
231                if (xmlPrologEnd > 0) {
232                    // ok, full XML prolog written: let's extract encoding
233                    final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
234                            xmlPrologEnd));
235                    if (m.find()) {
236                        final String encName = m.group(1).toUpperCase(Locale.ROOT);
237                        charset = Charset.forName(encName.substring(1, encName.length() - 1));
238                    } else {
239                        // no encoding found in XML prolog: using default
240                        // encoding
241                        charset = defaultCharset;
242                    }
243                } else if (xmlProlog.length() >= BUFFER_SIZE) {
244                    // no encoding found in first characters: using default
245                    // encoding
246                    charset = defaultCharset;
247                }
248            } else {
249                // no XML prolog: using default encoding
250                charset = defaultCharset;
251            }
252            if (charset != null) {
253                // encoding has been chosen: let's do it
254                prologWriter = null;
255                writer = new OutputStreamWriter(out, charset);
256                writer.write(xmlProlog.toString());
257                if (len > size) {
258                    writer.write(cbuf, off + size, len - size);
259                }
260            }
261        }
262    }
263
264    /**
265     * Flushes the underlying writer.
266     *
267     * @throws IOException if an error occurs flushing the underlying writer
268     */
269    @Override
270    public void flush() throws IOException {
271        if (writer != null) {
272            writer.flush();
273        }
274    }
275
276    /**
277     * Returns the default encoding.
278     *
279     * @return the default encoding
280     */
281    public String getDefaultEncoding() {
282        return defaultCharset.name();
283    }
284
285    /**
286     * Returns the detected encoding.
287     *
288     * @return the detected encoding
289     */
290    public String getEncoding() {
291        return charset.name();
292    }
293
294    /**
295     * Writes the characters to the underlying writer, detecting encoding.
296     *
297     * @param cbuf the buffer to write the characters from
298     * @param off The start offset
299     * @param len The number of characters to write
300     * @throws IOException if an error occurs detecting the encoding
301     */
302    @Override
303    public void write(final char[] cbuf, final int off, final int len) throws IOException {
304        if (prologWriter != null) {
305            detectEncoding(cbuf, off, len);
306        } else {
307            writer.write(cbuf, off, len);
308        }
309    }
310}