1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.io.output;
18
19 import java.io.File;
20 import java.io.FileNotFoundException;
21 import java.io.FileOutputStream;
22 import java.io.IOException;
23 import java.io.OutputStream;
24 import java.io.OutputStreamWriter;
25 import java.io.StringWriter;
26 import java.io.Writer;
27 import java.nio.charset.Charset;
28 import java.nio.charset.StandardCharsets;
29 import java.util.Locale;
30 import java.util.Objects;
31 import java.util.regex.Matcher;
32
33 import org.apache.commons.io.Charsets;
34 import org.apache.commons.io.IOUtils;
35 import org.apache.commons.io.build.AbstractStreamBuilder;
36 import org.apache.commons.io.input.XmlStreamReader;
37
38 /**
39 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream.
40 * <p>
41 * To build an instance, use {@link Builder}.
42 * </p>
43 *
44 * @see Builder
45 * @see XmlStreamReader
46 * @since 2.0
47 */
48 public class XmlStreamWriter extends Writer {
49
50 // @formatter:off
51 /**
52 * Builds a new {@link XmlStreamWriter}.
53 *
54 * <p>
55 * For example:
56 * </p>
57 * <pre>{@code
58 * WriterOutputStream w = WriterOutputStream.builder()
59 * .setPath(path)
60 * .setCharset(StandardCharsets.UTF_8)
61 * .get();}
62 * </pre>
63 *
64 * @see #get()
65 * @since 2.12.0
66 */
67 // @formatter:off
68 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> {
69
70 /**
71 * Constructs a new builder of {@link XmlStreamWriter}.
72 */
73 public Builder() {
74 setCharsetDefault(StandardCharsets.UTF_8);
75 setCharset(StandardCharsets.UTF_8);
76 }
77
78 /**
79 * Builds a new {@link XmlStreamWriter}.
80 * <p>
81 * You must set an aspect that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception.
82 * </p>
83 * <p>
84 * This builder uses the following aspects:
85 * </p>
86 * <ul>
87 * <li>{@link #getOutputStream()}</li>
88 * <li>{@link #getCharset()}</li>
89 * </ul>
90 *
91 * @return a new instance.
92 * @throws IllegalStateException if the {@code origin} is {@code null}.
93 * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}.
94 * @throws IOException if an I/O error occurs converting to an {@link OutputStream} using {@link #getOutputStream()}.
95 * @see #getOutputStream()
96 * @see #getUnchecked()
97 */
98 @Override
99 public XmlStreamWriter get() throws IOException {
100 return new XmlStreamWriter(this);
101 }
102
103 }
104
105 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
106
107 /**
108 * Constructs a new {@link Builder}.
109 *
110 * @return a new {@link Builder}.
111 * @since 2.12.0
112 */
113 public static Builder builder() {
114 return new Builder();
115 }
116
117 private final OutputStream out;
118
119 private final Charset defaultCharset;
120
121 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE);
122
123 private Writer writer;
124
125 private Charset charset;
126
127 @SuppressWarnings("resource") // caller closes.
128 private XmlStreamWriter(final Builder builder) throws IOException {
129 this(builder.getOutputStream(), builder.getCharset());
130 }
131
132 /**
133 * Constructs a new XML stream writer for the specified file
134 * with a default encoding of UTF-8.
135 *
136 * @param file The file to write to
137 * @throws FileNotFoundException if there is an error creating or
138 * opening the file
139 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
140 */
141 @Deprecated
142 public XmlStreamWriter(final File file) throws FileNotFoundException {
143 this(file, null);
144 }
145
146 /**
147 * Constructs a new XML stream writer for the specified file
148 * with the specified default encoding.
149 *
150 * @param file The file to write to
151 * @param defaultEncoding The default encoding if not encoding could be detected
152 * @throws FileNotFoundException if there is an error creating or
153 * opening the file
154 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
155 */
156 @Deprecated
157 @SuppressWarnings("resource")
158 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException {
159 this(new FileOutputStream(file), defaultEncoding);
160 }
161
162 /**
163 * Constructs a new XML stream writer for the specified output stream
164 * with a default encoding of UTF-8.
165 *
166 * @param out The output stream
167 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
168 */
169 @Deprecated
170 public XmlStreamWriter(final OutputStream out) {
171 this(out, StandardCharsets.UTF_8);
172 }
173
174 /**
175 * Constructs a new XML stream writer for the specified output stream
176 * with the specified default encoding.
177 *
178 * @param out The output stream
179 * @param defaultEncoding The default encoding if not encoding could be detected
180 */
181 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) {
182 this.out = out;
183 this.defaultCharset = Objects.requireNonNull(defaultEncoding);
184 }
185
186 /**
187 * Constructs a new XML stream writer for the specified output stream
188 * with the specified default encoding.
189 *
190 * @param out The output stream
191 * @param defaultEncoding The default encoding if not encoding could be detected
192 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
193 */
194 @Deprecated
195 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) {
196 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8));
197 }
198
199 /**
200 * Closes the underlying writer.
201 *
202 * @throws IOException if an error occurs closing the underlying writer
203 */
204 @Override
205 public void close() throws IOException {
206 if (writer == null) {
207 charset = defaultCharset;
208 writer = new OutputStreamWriter(out, charset);
209 writer.write(prologWriter.toString());
210 }
211 writer.close();
212 }
213
214 /**
215 * Detects the encoding.
216 *
217 * @param cbuf the buffer to write the characters from
218 * @param off The start offset
219 * @param len The number of characters to write
220 * @throws IOException if an error occurs detecting the encoding
221 */
222 private void detectEncoding(final char[] cbuf, final int off, final int len)
223 throws IOException {
224 int size = len;
225 final StringBuffer xmlProlog = prologWriter.getBuffer();
226 if (xmlProlog.length() + len > BUFFER_SIZE) {
227 size = BUFFER_SIZE - xmlProlog.length();
228 }
229 prologWriter.write(cbuf, off, size);
230
231 // try to determine encoding
232 if (xmlProlog.length() >= 5) {
233 if (xmlProlog.substring(0, 5).equals("<?xml")) {
234 // try to extract encoding from XML prolog
235 final int xmlPrologEnd = xmlProlog.indexOf("?>");
236 if (xmlPrologEnd > 0) {
237 // ok, full XML prolog written: let's extract encoding
238 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0,
239 xmlPrologEnd));
240 if (m.find()) {
241 final String encName = m.group(1).toUpperCase(Locale.ROOT);
242 charset = Charset.forName(encName.substring(1, encName.length() - 1));
243 } else {
244 // no encoding found in XML prolog: using default
245 // encoding
246 charset = defaultCharset;
247 }
248 } else if (xmlProlog.length() >= BUFFER_SIZE) {
249 // no encoding found in first characters: using default
250 // encoding
251 charset = defaultCharset;
252 }
253 } else {
254 // no XML prolog: using default encoding
255 charset = defaultCharset;
256 }
257 if (charset != null) {
258 // encoding has been chosen: let's do it
259 prologWriter = null;
260 writer = new OutputStreamWriter(out, charset);
261 writer.write(xmlProlog.toString());
262 if (len > size) {
263 writer.write(cbuf, off + size, len - size);
264 }
265 }
266 }
267 }
268
269 /**
270 * Flushes the underlying writer.
271 *
272 * @throws IOException if an error occurs flushing the underlying writer
273 */
274 @Override
275 public void flush() throws IOException {
276 if (writer != null) {
277 writer.flush();
278 }
279 }
280
281 /**
282 * Returns the default encoding.
283 *
284 * @return the default encoding
285 */
286 public String getDefaultEncoding() {
287 return defaultCharset.name();
288 }
289
290 /**
291 * Returns the detected encoding.
292 *
293 * @return the detected encoding
294 */
295 public String getEncoding() {
296 return charset.name();
297 }
298
299 /**
300 * Writes the characters to the underlying writer, detecting encoding.
301 *
302 * @param cbuf the buffer to write the characters from
303 * @param off The start offset
304 * @param len The number of characters to write
305 * @throws IOException if an error occurs detecting the encoding
306 */
307 @Override
308 public void write(final char[] cbuf, final int off, final int len) throws IOException {
309 if (prologWriter != null) {
310 detectEncoding(cbuf, off, len);
311 } else {
312 writer.write(cbuf, off, len);
313 }
314 }
315 }