001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.output; 018 019 import java.io.File; 020 import java.io.FileNotFoundException; 021 import java.io.FileOutputStream; 022 import java.io.IOException; 023 import java.io.OutputStream; 024 import java.io.OutputStreamWriter; 025 import java.io.StringWriter; 026 import java.io.Writer; 027 import java.util.regex.Matcher; 028 import java.util.regex.Pattern; 029 030 import org.apache.commons.io.input.XmlStreamReader; 031 032 /** 033 * Character stream that handles all the necessary Voodo to figure out the 034 * charset encoding of the XML document written to the stream. 035 * 036 * @version $Id: XmlStreamWriter.java 1304052 2012-03-22 20:55:29Z ggregory $ 037 * @see XmlStreamReader 038 * @since 2.0 039 */ 040 public class XmlStreamWriter extends Writer { 041 private static final int BUFFER_SIZE = 4096; 042 043 private final OutputStream out; 044 045 private final String defaultEncoding; 046 047 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE); 048 049 private Writer writer; 050 051 private String encoding; 052 053 /** 054 * Construct an new XML stream writer for the specified output stream 055 * with a default encoding of UTF-8. 056 * 057 * @param out The output stream 058 */ 059 public XmlStreamWriter(OutputStream out) { 060 this(out, null); 061 } 062 063 /** 064 * Construct an new XML stream writer for the specified output stream 065 * with the specified default encoding. 066 * 067 * @param out The output stream 068 * @param defaultEncoding The default encoding if not encoding could be detected 069 */ 070 public XmlStreamWriter(OutputStream out, String defaultEncoding) { 071 this.out = out; 072 this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8"; 073 } 074 075 /** 076 * Construct an new XML stream writer for the specified file 077 * with a default encoding of UTF-8. 078 * 079 * @param file The file to write to 080 * @throws FileNotFoundException if there is an error creating or 081 * opening the file 082 */ 083 public XmlStreamWriter(File file) throws FileNotFoundException { 084 this(file, null); 085 } 086 087 /** 088 * Construct an new XML stream writer for the specified file 089 * with the specified default encoding. 090 * 091 * @param file The file to write to 092 * @param defaultEncoding The default encoding if not encoding could be detected 093 * @throws FileNotFoundException if there is an error creating or 094 * opening the file 095 */ 096 public XmlStreamWriter(File file, String defaultEncoding) throws FileNotFoundException { 097 this(new FileOutputStream(file), defaultEncoding); 098 } 099 100 /** 101 * Return the detected encoding. 102 * 103 * @return the detected encoding 104 */ 105 public String getEncoding() { 106 return encoding; 107 } 108 109 /** 110 * Return the default encoding. 111 * 112 * @return the default encoding 113 */ 114 public String getDefaultEncoding() { 115 return defaultEncoding; 116 } 117 118 /** 119 * Close the underlying writer. 120 * 121 * @throws IOException if an error occurs closing the underlying writer 122 */ 123 @Override 124 public void close() throws IOException { 125 if (writer == null) { 126 encoding = defaultEncoding; 127 writer = new OutputStreamWriter(out, encoding); 128 writer.write(xmlPrologWriter.toString()); 129 } 130 writer.close(); 131 } 132 133 /** 134 * Flush the underlying writer. 135 * 136 * @throws IOException if an error occurs flushing the underlying writer 137 */ 138 @Override 139 public void flush() throws IOException { 140 if (writer != null) { 141 writer.flush(); 142 } 143 } 144 145 /** 146 * Detect the encoding. 147 * 148 * @param cbuf the buffer to write the characters from 149 * @param off The start offset 150 * @param len The number of characters to write 151 * @throws IOException if an error occurs detecting the encoding 152 */ 153 private void detectEncoding(char[] cbuf, int off, int len) 154 throws IOException { 155 int size = len; 156 StringBuffer xmlProlog = xmlPrologWriter.getBuffer(); 157 if (xmlProlog.length() + len > BUFFER_SIZE) { 158 size = BUFFER_SIZE - xmlProlog.length(); 159 } 160 xmlPrologWriter.write(cbuf, off, size); 161 162 // try to determine encoding 163 if (xmlProlog.length() >= 5) { 164 if (xmlProlog.substring(0, 5).equals("<?xml")) { 165 // try to extract encoding from XML prolog 166 int xmlPrologEnd = xmlProlog.indexOf("?>"); 167 if (xmlPrologEnd > 0) { 168 // ok, full XML prolog written: let's extract encoding 169 Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, 170 xmlPrologEnd)); 171 if (m.find()) { 172 encoding = m.group(1).toUpperCase(); 173 encoding = encoding.substring(1, encoding.length() - 1); 174 } else { 175 // no encoding found in XML prolog: using default 176 // encoding 177 encoding = defaultEncoding; 178 } 179 } else { 180 if (xmlProlog.length() >= BUFFER_SIZE) { 181 // no encoding found in first characters: using default 182 // encoding 183 encoding = defaultEncoding; 184 } 185 } 186 } else { 187 // no XML prolog: using default encoding 188 encoding = defaultEncoding; 189 } 190 if (encoding != null) { 191 // encoding has been chosen: let's do it 192 xmlPrologWriter = null; 193 writer = new OutputStreamWriter(out, encoding); 194 writer.write(xmlProlog.toString()); 195 if (len > size) { 196 writer.write(cbuf, off + size, len - size); 197 } 198 } 199 } 200 } 201 202 /** 203 * Write the characters to the underlying writer, detecing encoding. 204 * 205 * @param cbuf the buffer to write the characters from 206 * @param off The start offset 207 * @param len The number of characters to write 208 * @throws IOException if an error occurs detecting the encoding 209 */ 210 @Override 211 public void write(char[] cbuf, int off, int len) throws IOException { 212 if (xmlPrologWriter != null) { 213 detectEncoding(cbuf, off, len); 214 } else { 215 writer.write(cbuf, off, len); 216 } 217 } 218 219 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN; 220 }