001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024import java.io.OutputStreamWriter;
025import java.io.StringWriter;
026import java.io.Writer;
027import java.nio.charset.Charset;
028import java.nio.charset.StandardCharsets;
029import java.util.Locale;
030import java.util.Objects;
031import java.util.regex.Matcher;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.input.XmlStreamReader;
037
038/**
039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
040 * <p>
041 * To build an instance, use {@link Builder}.
042 * </p>
043 *
044 * @see Builder
045 * @see XmlStreamReader
046 * @since 2.0
047 */
048public class XmlStreamWriter extends Writer {
049
050    // @formatter:off
051    /**
052     * Builds a new {@link XmlStreamWriter}.
053     *
054     * <p>
055     * For example:
056     * </p>
057     * <pre>{@code
058     * WriterOutputStream w = WriterOutputStream.builder()
059     *   .setPath(path)
060     *   .setCharset(StandardCharsets.UTF_8)
061     *   .get();}
062     * </pre>
063     *
064     * @see #get()
065     * @since 2.12.0
066     */
067    // @formatter:off
068    public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
069
070        /**
071         * Constructs a new builder of {@link XmlStreamWriter}.
072         */
073        public Builder() {
074            setCharsetDefault(StandardCharsets.UTF_8);
075            setCharset(StandardCharsets.UTF_8);
076        }
077
078        /**
079         * Builds a new {@link XmlStreamWriter}.
080         * <p>
081         * You must set an aspect that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
082         * </p>
083         * <p>
084         * This builder uses the following aspects:
085         * </p>
086         * <ul>
087         * <li>{@link #getOutputStream()}</li>
088         * <li>{@link #getCharset()}</li>
089         * </ul>
090         *
091         * @return a new instance.
092         * @throws IllegalStateException         if the {@code origin} is {@code null}.
093         * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
094         * @throws IOException                   if an I/O error occurs converting to an {@link OutputStream} using {@link #getOutputStream()}.
095         * @see #getOutputStream()
096         * @see #getUnchecked()
097         */
098        @Override
099        public XmlStreamWriter get() throws IOException {
100            return new XmlStreamWriter(this);
101        }
102
103    }
104
105    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
106
107    /**
108     * Constructs a new {@link Builder}.
109     *
110     * @return a new {@link Builder}.
111     * @since 2.12.0
112     */
113    public static Builder builder() {
114        return new Builder();
115    }
116
117    private final OutputStream out;
118
119    private final Charset defaultCharset;
120
121    private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
122
123    private Writer writer;
124
125    private Charset charset;
126
127    @SuppressWarnings("resource") // caller closes.
128    private XmlStreamWriter(final Builder builder) throws IOException {
129        this(builder.getOutputStream(), builder.getCharset());
130    }
131
132    /**
133     * Constructs a new XML stream writer for the specified file
134     * with a default encoding of UTF-8.
135     *
136     * @param file The file to write to
137     * @throws FileNotFoundException if there is an error creating or
138     * opening the file
139     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
140     */
141    @Deprecated
142    public XmlStreamWriter(final File file) throws FileNotFoundException {
143        this(file, null);
144    }
145
146    /**
147     * Constructs a new XML stream writer for the specified file
148     * with the specified default encoding.
149     *
150     * @param file The file to write to
151     * @param defaultEncoding The default encoding if not encoding could be detected
152     * @throws FileNotFoundException if there is an error creating or
153     * opening the file
154     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
155     */
156    @Deprecated
157    @SuppressWarnings("resource")
158    public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
159        this(new FileOutputStream(file), defaultEncoding);
160    }
161
162    /**
163     * Constructs a new XML stream writer for the specified output stream
164     * with a default encoding of UTF-8.
165     *
166     * @param out The output stream
167     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
168     */
169    @Deprecated
170    public XmlStreamWriter(final OutputStream out) {
171        this(out, StandardCharsets.UTF_8);
172    }
173
174    /**
175     * Constructs a new XML stream writer for the specified output stream
176     * with the specified default encoding.
177     *
178     * @param out The output stream
179     * @param defaultEncoding The default encoding if not encoding could be detected
180     */
181    private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
182        this.out = out;
183        this.defaultCharset = Objects.requireNonNull(defaultEncoding);
184    }
185
186    /**
187     * Constructs a new XML stream writer for the specified output stream
188     * with the specified default encoding.
189     *
190     * @param out The output stream
191     * @param defaultEncoding The default encoding if not encoding could be detected
192     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
193     */
194    @Deprecated
195    public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
196        this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
197    }
198
199    /**
200     * Closes the underlying writer.
201     *
202     * @throws IOException if an error occurs closing the underlying writer
203     */
204    @Override
205    public void close() throws IOException {
206        if (writer == null) {
207            charset = defaultCharset;
208            writer = new OutputStreamWriter(out, charset);
209            writer.write(prologWriter.toString());
210        }
211        writer.close();
212    }
213
214    /**
215     * Detects the encoding.
216     *
217     * @param cbuf the buffer to write the characters from
218     * @param off The start offset
219     * @param len The number of characters to write
220     * @throws IOException if an error occurs detecting the encoding
221     */
222    private void detectEncoding(final char[] cbuf, final int off, final int len)
223            throws IOException {
224        int size = len;
225        final StringBuffer xmlProlog = prologWriter.getBuffer();
226        if (xmlProlog.length() + len > BUFFER_SIZE) {
227            size = BUFFER_SIZE - xmlProlog.length();
228        }
229        prologWriter.write(cbuf, off, size);
230
231        // try to determine encoding
232        if (xmlProlog.length() >= 5) {
233            if (xmlProlog.substring(0, 5).equals("<?xml")) {
234                // try to extract encoding from XML prolog
235                final int xmlPrologEnd = xmlProlog.indexOf("?>");
236                if (xmlPrologEnd > 0) {
237                    // ok, full XML prolog written: let's extract encoding
238                    final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
239                            xmlPrologEnd));
240                    if (m.find()) {
241                        final String encName = m.group(1).toUpperCase(Locale.ROOT);
242                        charset = Charset.forName(encName.substring(1, encName.length() - 1));
243                    } else {
244                        // no encoding found in XML prolog: using default
245                        // encoding
246                        charset = defaultCharset;
247                    }
248                } else if (xmlProlog.length() >= BUFFER_SIZE) {
249                    // no encoding found in first characters: using default
250                    // encoding
251                    charset = defaultCharset;
252                }
253            } else {
254                // no XML prolog: using default encoding
255                charset = defaultCharset;
256            }
257            if (charset != null) {
258                // encoding has been chosen: let's do it
259                prologWriter = null;
260                writer = new OutputStreamWriter(out, charset);
261                writer.write(xmlProlog.toString());
262                if (len > size) {
263                    writer.write(cbuf, off + size, len - size);
264                }
265            }
266        }
267    }
268
269    /**
270     * Flushes the underlying writer.
271     *
272     * @throws IOException if an error occurs flushing the underlying writer
273     */
274    @Override
275    public void flush() throws IOException {
276        if (writer != null) {
277            writer.flush();
278        }
279    }
280
281    /**
282     * Returns the default encoding.
283     *
284     * @return the default encoding
285     */
286    public String getDefaultEncoding() {
287        return defaultCharset.name();
288    }
289
290    /**
291     * Returns the detected encoding.
292     *
293     * @return the detected encoding
294     */
295    public String getEncoding() {
296        return charset.name();
297    }
298
299    /**
300     * Writes the characters to the underlying writer, detecting encoding.
301     *
302     * @param cbuf the buffer to write the characters from
303     * @param off The start offset
304     * @param len The number of characters to write
305     * @throws IOException if an error occurs detecting the encoding
306     */
307    @Override
308    public void write(final char[] cbuf, final int off, final int len) throws IOException {
309        if (prologWriter != null) {
310            detectEncoding(cbuf, off, len);
311        } else {
312            writer.write(cbuf, off, len);
313        }
314    }
315}