001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.util.Locale; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031import org.apache.commons.io.IOUtils; 032import org.apache.commons.io.input.XmlStreamReader; 033 034/** 035 * Character stream that handles all the necessary Voodoo to figure out the 036 * charset encoding of the XML document written to the stream. 037 * 038 * @see XmlStreamReader 039 * @since 2.0 040 */ 041public class XmlStreamWriter extends Writer { 042 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 043 044 private final OutputStream out; 045 046 private final String defaultEncoding; 047 048 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE); 049 050 private Writer writer; 051 052 private String encoding; 053 054 /** 055 * Constructs a new XML stream writer for the specified output stream 056 * with a default encoding of UTF-8. 057 * 058 * @param out The output stream 059 */ 060 public XmlStreamWriter(final OutputStream out) { 061 this(out, null); 062 } 063 064 /** 065 * Constructs a new XML stream writer for the specified output stream 066 * with the specified default encoding. 067 * 068 * @param out The output stream 069 * @param defaultEncoding The default encoding if not encoding could be detected 070 */ 071 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 072 this.out = out; 073 this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8"; 074 } 075 076 /** 077 * Constructs a new XML stream writer for the specified file 078 * with a default encoding of UTF-8. 079 * 080 * @param file The file to write to 081 * @throws FileNotFoundException if there is an error creating or 082 * opening the file 083 */ 084 public XmlStreamWriter(final File file) throws FileNotFoundException { 085 this(file, null); 086 } 087 088 /** 089 * Constructs a new XML stream writer for the specified file 090 * with the specified default encoding. 091 * 092 * @param file The file to write to 093 * @param defaultEncoding The default encoding if not encoding could be detected 094 * @throws FileNotFoundException if there is an error creating or 095 * opening the file 096 */ 097 @SuppressWarnings("resource") 098 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 099 this(new FileOutputStream(file), defaultEncoding); 100 } 101 102 /** 103 * Returns the detected encoding. 104 * 105 * @return the detected encoding 106 */ 107 public String getEncoding() { 108 return encoding; 109 } 110 111 /** 112 * Returns the default encoding. 113 * 114 * @return the default encoding 115 */ 116 public String getDefaultEncoding() { 117 return defaultEncoding; 118 } 119 120 /** 121 * Closes the underlying writer. 122 * 123 * @throws IOException if an error occurs closing the underlying writer 124 */ 125 @Override 126 public void close() throws IOException { 127 if (writer == null) { 128 encoding = defaultEncoding; 129 writer = new OutputStreamWriter(out, encoding); 130 writer.write(xmlPrologWriter.toString()); 131 } 132 writer.close(); 133 } 134 135 /** 136 * Flushes the underlying writer. 137 * 138 * @throws IOException if an error occurs flushing the underlying writer 139 */ 140 @Override 141 public void flush() throws IOException { 142 if (writer != null) { 143 writer.flush(); 144 } 145 } 146 147 /** 148 * Detects the encoding. 149 * 150 * @param cbuf the buffer to write the characters from 151 * @param off The start offset 152 * @param len The number of characters to write 153 * @throws IOException if an error occurs detecting the encoding 154 */ 155 private void detectEncoding(final char[] cbuf, final int off, final int len) 156 throws IOException { 157 int size = len; 158 final StringBuffer xmlProlog = xmlPrologWriter.getBuffer(); 159 if (xmlProlog.length() + len > BUFFER_SIZE) { 160 size = BUFFER_SIZE - xmlProlog.length(); 161 } 162 xmlPrologWriter.write(cbuf, off, size); 163 164 // try to determine encoding 165 if (xmlProlog.length() >= 5) { 166 if (xmlProlog.substring(0, 5).equals("<?xml")) { 167 // try to extract encoding from XML prolog 168 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 169 if (xmlPrologEnd > 0) { 170 // ok, full XML prolog written: let's extract encoding 171 final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, 172 xmlPrologEnd)); 173 if (m.find()) { 174 encoding = m.group(1).toUpperCase(Locale.ROOT); 175 encoding = encoding.substring(1, encoding.length() - 1); 176 } else { 177 // no encoding found in XML prolog: using default 178 // encoding 179 encoding = defaultEncoding; 180 } 181 } else { 182 if (xmlProlog.length() >= BUFFER_SIZE) { 183 // no encoding found in first characters: using default 184 // encoding 185 encoding = defaultEncoding; 186 } 187 } 188 } else { 189 // no XML prolog: using default encoding 190 encoding = defaultEncoding; 191 } 192 if (encoding != null) { 193 // encoding has been chosen: let's do it 194 xmlPrologWriter = null; 195 writer = new OutputStreamWriter(out, encoding); 196 writer.write(xmlProlog.toString()); 197 if (len > size) { 198 writer.write(cbuf, off + size, len - size); 199 } 200 } 201 } 202 } 203 204 /** 205 * Writes the characters to the underlying writer, detecting encoding. 206 * 207 * @param cbuf the buffer to write the characters from 208 * @param off The start offset 209 * @param len The number of characters to write 210 * @throws IOException if an error occurs detecting the encoding 211 */ 212 @Override 213 public void write(final char[] cbuf, final int off, final int len) throws IOException { 214 if (xmlPrologWriter != null) { 215 detectEncoding(cbuf, off, len); 216 } else { 217 writer.write(cbuf, off, len); 218 } 219 } 220 221 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN; 222}