001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.io.output;
018
019 import java.io.File;
020 import java.io.FileNotFoundException;
021 import java.io.FileOutputStream;
022 import java.io.IOException;
023 import java.io.OutputStream;
024 import java.io.OutputStreamWriter;
025 import java.io.StringWriter;
026 import java.io.Writer;
027 import java.util.regex.Matcher;
028 import java.util.regex.Pattern;
029
030 import org.apache.commons.io.input.XmlStreamReader;
031
032 /**
033 * Character stream that handles all the necessary Voodo to figure out the
034 * charset encoding of the XML document written to the stream.
035 *
036 * @version $Id: XmlStreamWriter.java 1304052 2012-03-22 20:55:29Z ggregory $
037 * @see XmlStreamReader
038 * @since 2.0
039 */
040 public class XmlStreamWriter extends Writer {
041 private static final int BUFFER_SIZE = 4096;
042
043 private final OutputStream out;
044
045 private final String defaultEncoding;
046
047 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
048
049 private Writer writer;
050
051 private String encoding;
052
053 /**
054 * Construct an new XML stream writer for the specified output stream
055 * with a default encoding of UTF-8.
056 *
057 * @param out The output stream
058 */
059 public XmlStreamWriter(OutputStream out) {
060 this(out, null);
061 }
062
063 /**
064 * Construct an new XML stream writer for the specified output stream
065 * with the specified default encoding.
066 *
067 * @param out The output stream
068 * @param defaultEncoding The default encoding if not encoding could be detected
069 */
070 public XmlStreamWriter(OutputStream out, String defaultEncoding) {
071 this.out = out;
072 this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8";
073 }
074
075 /**
076 * Construct an new XML stream writer for the specified file
077 * with a default encoding of UTF-8.
078 *
079 * @param file The file to write to
080 * @throws FileNotFoundException if there is an error creating or
081 * opening the file
082 */
083 public XmlStreamWriter(File file) throws FileNotFoundException {
084 this(file, null);
085 }
086
087 /**
088 * Construct an new XML stream writer for the specified file
089 * with the specified default encoding.
090 *
091 * @param file The file to write to
092 * @param defaultEncoding The default encoding if not encoding could be detected
093 * @throws FileNotFoundException if there is an error creating or
094 * opening the file
095 */
096 public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException {
097 this(new FileOutputStream(file), defaultEncoding);
098 }
099
100 /**
101 * Return the detected encoding.
102 *
103 * @return the detected encoding
104 */
105 public String getEncoding() {
106 return encoding;
107 }
108
109 /**
110 * Return the default encoding.
111 *
112 * @return the default encoding
113 */
114 public String getDefaultEncoding() {
115 return defaultEncoding;
116 }
117
118 /**
119 * Close the underlying writer.
120 *
121 * @throws IOException if an error occurs closing the underlying writer
122 */
123 @Override
124 public void close() throws IOException {
125 if (writer == null) {
126 encoding = defaultEncoding;
127 writer = new OutputStreamWriter(out, encoding);
128 writer.write(xmlPrologWriter.toString());
129 }
130 writer.close();
131 }
132
133 /**
134 * Flush the underlying writer.
135 *
136 * @throws IOException if an error occurs flushing the underlying writer
137 */
138 @Override
139 public void flush() throws IOException {
140 if (writer != null) {
141 writer.flush();
142 }
143 }
144
145 /**
146 * Detect the encoding.
147 *
148 * @param cbuf the buffer to write the characters from
149 * @param off The start offset
150 * @param len The number of characters to write
151 * @throws IOException if an error occurs detecting the encoding
152 */
153 private void detectEncoding(char[] cbuf, int off, int len)
154 throws IOException {
155 int size = len;
156 StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
157 if (xmlProlog.length() + len > BUFFER_SIZE) {
158 size = BUFFER_SIZE - xmlProlog.length();
159 }
160 xmlPrologWriter.write(cbuf, off, size);
161
162 // try to determine encoding
163 if (xmlProlog.length() >= 5) {
164 if (xmlProlog.substring(0, 5).equals("<?xml")) {
165 // try to extract encoding from XML prolog
166 int xmlPrologEnd = xmlProlog.indexOf("?>");
167 if (xmlPrologEnd > 0) {
168 // ok, full XML prolog written: let's extract encoding
169 Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0,
170 xmlPrologEnd));
171 if (m.find()) {
172 encoding = m.group(1).toUpperCase();
173 encoding = encoding.substring(1, encoding.length() - 1);
174 } else {
175 // no encoding found in XML prolog: using default
176 // encoding
177 encoding = defaultEncoding;
178 }
179 } else {
180 if (xmlProlog.length() >= BUFFER_SIZE) {
181 // no encoding found in first characters: using default
182 // encoding
183 encoding = defaultEncoding;
184 }
185 }
186 } else {
187 // no XML prolog: using default encoding
188 encoding = defaultEncoding;
189 }
190 if (encoding != null) {
191 // encoding has been chosen: let's do it
192 xmlPrologWriter = null;
193 writer = new OutputStreamWriter(out, encoding);
194 writer.write(xmlProlog.toString());
195 if (len > size) {
196 writer.write(cbuf, off + size, len - size);
197 }
198 }
199 }
200 }
201
202 /**
203 * Write the characters to the underlying writer, detecing encoding.
204 *
205 * @param cbuf the buffer to write the characters from
206 * @param off The start offset
207 * @param len The number of characters to write
208 * @throws IOException if an error occurs detecting the encoding
209 */
210 @Override
211 public void write(char[] cbuf, int off, int len) throws IOException {
212 if (xmlPrologWriter != null) {
213 detectEncoding(cbuf, off, len);
214 } else {
215 writer.write(cbuf, off, len);
216 }
217 }
218
219 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN;
220 }