001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.output; 018 019 import java.io.File; 020 import java.io.FileNotFoundException; 021 import java.io.FileOutputStream; 022 import java.io.IOException; 023 import java.io.OutputStream; 024 import java.io.OutputStreamWriter; 025 import java.io.StringWriter; 026 import java.io.Writer; 027 import java.util.regex.Matcher; 028 import java.util.regex.Pattern; 029 030 import org.apache.commons.io.input.XmlStreamReader; 031 032 /** 033 * Character stream that handles all the necessary Voodo to figure out the 034 * charset encoding of the XML document written to the stream. 035 * 036 * @author Herve Boutemy 037 * @version $Id: XmlStreamWriter.java 1004737 2010-10-05 17:56:04Z niallp $ 038 * @see XmlStreamReader 039 * @since Commons IO 2.0 040 */ 041 public class XmlStreamWriter extends Writer { 042 private static final int BUFFER_SIZE = 4096; 043 044 private final OutputStream out; 045 046 private final String defaultEncoding; 047 048 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE); 049 050 private Writer writer; 051 052 private String encoding; 053 054 /** 055 * Construct an new XML stream writer for the specified output stream 056 * with a default encoding of UTF-8. 057 * 058 * @param out The output stream 059 */ 060 public XmlStreamWriter(OutputStream out) { 061 this(out, null); 062 } 063 064 /** 065 * Construct an new XML stream writer for the specified output stream 066 * with the specified default encoding. 067 * 068 * @param out The output stream 069 * @param defaultEncoding The default encoding if not encoding could be detected 070 */ 071 public XmlStreamWriter(OutputStream out, String defaultEncoding) { 072 this.out = out; 073 this.defaultEncoding = (defaultEncoding != null ? defaultEncoding : "UTF-8"); 074 } 075 076 /** 077 * Construct an new XML stream writer for the specified file 078 * with a default encoding of UTF-8. 079 * 080 * @param file The file to write to 081 * @throws FileNotFoundException if there is an error creating or 082 * opening the file 083 */ 084 public XmlStreamWriter(File file) throws FileNotFoundException { 085 this(file, null); 086 } 087 088 /** 089 * Construct an new XML stream writer for the specified file 090 * with the specified default encoding. 091 * 092 * @param file The file to write to 093 * @param defaultEncoding The default encoding if not encoding could be detected 094 * @throws FileNotFoundException if there is an error creating or 095 * opening the file 096 */ 097 public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException { 098 this(new FileOutputStream(file), defaultEncoding); 099 } 100 101 /** 102 * Return the detected encoding. 103 * 104 * @return the detected encoding 105 */ 106 public String getEncoding() { 107 return encoding; 108 } 109 110 /** 111 * Return the default encoding. 112 * 113 * @return the default encoding 114 */ 115 public String getDefaultEncoding() { 116 return defaultEncoding; 117 } 118 119 /** 120 * Close the underlying writer. 121 * 122 * @throws IOException if an error occurs closing the underlying writer 123 */ 124 @Override 125 public void close() throws IOException { 126 if (writer == null) { 127 encoding = defaultEncoding; 128 writer = new OutputStreamWriter(out, encoding); 129 writer.write(xmlPrologWriter.toString()); 130 } 131 writer.close(); 132 } 133 134 /** 135 * Flush the underlying writer. 136 * 137 * @throws IOException if an error occurs flushing the underlying writer 138 */ 139 @Override 140 public void flush() throws IOException { 141 if (writer != null) { 142 writer.flush(); 143 } 144 } 145 146 /** 147 * Detect the encoding. 148 * 149 * @param cbuf the buffer to write the characters from 150 * @param off The start offset 151 * @param len The number of characters to write 152 * @throws IOException if an error occurs detecting the encoding 153 */ 154 private void detectEncoding(char[] cbuf, int off, int len) 155 throws IOException { 156 int size = len; 157 StringBuffer xmlProlog = xmlPrologWriter.getBuffer(); 158 if (xmlProlog.length() + len > BUFFER_SIZE) { 159 size = BUFFER_SIZE - xmlProlog.length(); 160 } 161 xmlPrologWriter.write(cbuf, off, size); 162 163 // try to determine encoding 164 if (xmlProlog.length() >= 5) { 165 if (xmlProlog.substring(0, 5).equals("<?xml")) { 166 // try to extract encoding from XML prolog 167 int xmlPrologEnd = xmlProlog.indexOf("?>"); 168 if (xmlPrologEnd > 0) { 169 // ok, full XML prolog written: let's extract encoding 170 Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, 171 xmlPrologEnd)); 172 if (m.find()) { 173 encoding = m.group(1).toUpperCase(); 174 encoding = encoding.substring(1, encoding.length() - 1); 175 } else { 176 // no encoding found in XML prolog: using default 177 // encoding 178 encoding = defaultEncoding; 179 } 180 } else { 181 if (xmlProlog.length() >= BUFFER_SIZE) { 182 // no encoding found in first characters: using default 183 // encoding 184 encoding = defaultEncoding; 185 } 186 } 187 } else { 188 // no XML prolog: using default encoding 189 encoding = defaultEncoding; 190 } 191 if (encoding != null) { 192 // encoding has been chosen: let's do it 193 xmlPrologWriter = null; 194 writer = new OutputStreamWriter(out, encoding); 195 writer.write(xmlProlog.toString()); 196 if (len > size) { 197 writer.write(cbuf, off + size, len - size); 198 } 199 } 200 } 201 } 202 203 /** 204 * Write the characters to the underlying writer, detecing encoding. 205 * 206 * @param cbuf the buffer to write the characters from 207 * @param off The start offset 208 * @param len The number of characters to write 209 * @throws IOException if an error occurs detecting the encoding 210 */ 211 @Override 212 public void write(char[] cbuf, int off, int len) throws IOException { 213 if (xmlPrologWriter != null) { 214 detectEncoding(cbuf, off, len); 215 } else { 216 writer.write(cbuf, off, len); 217 } 218 } 219 220 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN; 221 }