001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.output;
018
019 import java.io.File;
020 import java.io.FileNotFoundException;
021 import java.io.FileOutputStream;
022 import java.io.IOException;
023 import java.io.OutputStream;
024 import java.io.OutputStreamWriter;
025 import java.io.StringWriter;
026 import java.io.Writer;
027 import java.util.regex.Matcher;
028 import java.util.regex.Pattern;
029
030 import org.apache.commons.io.input.XmlStreamReader;
031
032 /**
033 * Character stream that handles all the necessary Voodo to figure out the
034 * charset encoding of the XML document written to the stream.
035 *
036 * @author Herve Boutemy
037 * @version $Id: XmlStreamWriter.java 1004737 2010-10-05 17:56:04Z niallp $
038 * @see XmlStreamReader
039 * @since Commons IO 2.0
040 */
041 public class XmlStreamWriter extends Writer {
042 private static final int BUFFER_SIZE = 4096;
043
044 private final OutputStream out;
045
046 private final String defaultEncoding;
047
048 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
049
050 private Writer writer;
051
052 private String encoding;
053
054 /**
055 * Construct an new XML stream writer for the specified output stream
056 * with a default encoding of UTF-8.
057 *
058 * @param out The output stream
059 */
060 public XmlStreamWriter(OutputStream out) {
061 this(out, null);
062 }
063
064 /**
065 * Construct an new XML stream writer for the specified output stream
066 * with the specified default encoding.
067 *
068 * @param out The output stream
069 * @param defaultEncoding The default encoding if not encoding could be detected
070 */
071 public XmlStreamWriter(OutputStream out, String defaultEncoding) {
072 this.out = out;
073 this.defaultEncoding = (defaultEncoding != null ? defaultEncoding : "UTF-8");
074 }
075
076 /**
077 * Construct an new XML stream writer for the specified file
078 * with a default encoding of UTF-8.
079 *
080 * @param file The file to write to
081 * @throws FileNotFoundException if there is an error creating or
082 * opening the file
083 */
084 public XmlStreamWriter(File file) throws FileNotFoundException {
085 this(file, null);
086 }
087
088 /**
089 * Construct an new XML stream writer for the specified file
090 * with the specified default encoding.
091 *
092 * @param file The file to write to
093 * @param defaultEncoding The default encoding if not encoding could be detected
094 * @throws FileNotFoundException if there is an error creating or
095 * opening the file
096 */
097 public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException {
098 this(new FileOutputStream(file), defaultEncoding);
099 }
100
101 /**
102 * Return the detected encoding.
103 *
104 * @return the detected encoding
105 */
106 public String getEncoding() {
107 return encoding;
108 }
109
110 /**
111 * Return the default encoding.
112 *
113 * @return the default encoding
114 */
115 public String getDefaultEncoding() {
116 return defaultEncoding;
117 }
118
119 /**
120 * Close the underlying writer.
121 *
122 * @throws IOException if an error occurs closing the underlying writer
123 */
124 @Override
125 public void close() throws IOException {
126 if (writer == null) {
127 encoding = defaultEncoding;
128 writer = new OutputStreamWriter(out, encoding);
129 writer.write(xmlPrologWriter.toString());
130 }
131 writer.close();
132 }
133
134 /**
135 * Flush the underlying writer.
136 *
137 * @throws IOException if an error occurs flushing the underlying writer
138 */
139 @Override
140 public void flush() throws IOException {
141 if (writer != null) {
142 writer.flush();
143 }
144 }
145
146 /**
147 * Detect the encoding.
148 *
149 * @param cbuf the buffer to write the characters from
150 * @param off The start offset
151 * @param len The number of characters to write
152 * @throws IOException if an error occurs detecting the encoding
153 */
154 private void detectEncoding(char[] cbuf, int off, int len)
155 throws IOException {
156 int size = len;
157 StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
158 if (xmlProlog.length() + len > BUFFER_SIZE) {
159 size = BUFFER_SIZE - xmlProlog.length();
160 }
161 xmlPrologWriter.write(cbuf, off, size);
162
163 // try to determine encoding
164 if (xmlProlog.length() >= 5) {
165 if (xmlProlog.substring(0, 5).equals("<?xml")) {
166 // try to extract encoding from XML prolog
167 int xmlPrologEnd = xmlProlog.indexOf("?>");
168 if (xmlPrologEnd > 0) {
169 // ok, full XML prolog written: let's extract encoding
170 Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
171 xmlPrologEnd));
172 if (m.find()) {
173 encoding = m.group(1).toUpperCase();
174 encoding = encoding.substring(1, encoding.length() - 1);
175 } else {
176 // no encoding found in XML prolog: using default
177 // encoding
178 encoding = defaultEncoding;
179 }
180 } else {
181 if (xmlProlog.length() >= BUFFER_SIZE) {
182 // no encoding found in first characters: using default
183 // encoding
184 encoding = defaultEncoding;
185 }
186 }
187 } else {
188 // no XML prolog: using default encoding
189 encoding = defaultEncoding;
190 }
191 if (encoding != null) {
192 // encoding has been chosen: let's do it
193 xmlPrologWriter = null;
194 writer = new OutputStreamWriter(out, encoding);
195 writer.write(xmlProlog.toString());
196 if (len > size) {
197 writer.write(cbuf, off + size, len - size);
198 }
199 }
200 }
201 }
202
203 /**
204 * Write the characters to the underlying writer, detecing encoding.
205 *
206 * @param cbuf the buffer to write the characters from
207 * @param off The start offset
208 * @param len The number of characters to write
209 * @throws IOException if an error occurs detecting the encoding
210 */
211 @Override
212 public void write(char[] cbuf, int off, int len) throws IOException {
213 if (xmlPrologWriter != null) {
214 detectEncoding(cbuf, off, len);
215 } else {
216 writer.write(cbuf, off, len);
217 }
218 }
219
220 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
221 }