001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.commons.io.input.XmlStreamReader; 031 032/** 033 * Character stream that handles all the necessary Voodo to figure out the 034 * charset encoding of the XML document written to the stream. 035 * 036 * @version $Id: XmlStreamWriter.java 1415850 2012-11-30 20:51:39Z ggregory $ 037 * @see XmlStreamReader 038 * @since 2.0 039 */ 040public class XmlStreamWriter extends Writer { 041 private static final int BUFFER_SIZE = 4096; 042 043 private final OutputStream out; 044 045 private final String defaultEncoding; 046 047 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE); 048 049 private Writer writer; 050 051 private String encoding; 052 053 /** 054 * Construct an new XML stream writer for the specified output stream 055 * with a default encoding of UTF-8. 056 * 057 * @param out The output stream 058 */ 059 public XmlStreamWriter(final OutputStream out) { 060 this(out, null); 061 } 062 063 /** 064 * Construct an new XML stream writer for the specified output stream 065 * with the specified default encoding. 066 * 067 * @param out The output stream 068 * @param defaultEncoding The default encoding if not encoding could be detected 069 */ 070 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 071 this.out = out; 072 this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8"; 073 } 074 075 /** 076 * Construct an new XML stream writer for the specified file 077 * with a default encoding of UTF-8. 078 * 079 * @param file The file to write to 080 * @throws FileNotFoundException if there is an error creating or 081 * opening the file 082 */ 083 public XmlStreamWriter(final File file) throws FileNotFoundException { 084 this(file, null); 085 } 086 087 /** 088 * Construct an new XML stream writer for the specified file 089 * with the specified default encoding. 090 * 091 * @param file The file to write to 092 * @param defaultEncoding The default encoding if not encoding could be detected 093 * @throws FileNotFoundException if there is an error creating or 094 * opening the file 095 */ 096 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 097 this(new FileOutputStream(file), defaultEncoding); 098 } 099 100 /** 101 * Return the detected encoding. 102 * 103 * @return the detected encoding 104 */ 105 public String getEncoding() { 106 return encoding; 107 } 108 109 /** 110 * Return the default encoding. 111 * 112 * @return the default encoding 113 */ 114 public String getDefaultEncoding() { 115 return defaultEncoding; 116 } 117 118 /** 119 * Close the underlying writer. 120 * 121 * @throws IOException if an error occurs closing the underlying writer 122 */ 123 @Override 124 public void close() throws IOException { 125 if (writer == null) { 126 encoding = defaultEncoding; 127 writer = new OutputStreamWriter(out, encoding); 128 writer.write(xmlPrologWriter.toString()); 129 } 130 writer.close(); 131 } 132 133 /** 134 * Flush the underlying writer. 135 * 136 * @throws IOException if an error occurs flushing the underlying writer 137 */ 138 @Override 139 public void flush() throws IOException { 140 if (writer != null) { 141 writer.flush(); 142 } 143 } 144 145 /** 146 * Detect the encoding. 147 * 148 * @param cbuf the buffer to write the characters from 149 * @param off The start offset 150 * @param len The number of characters to write 151 * @throws IOException if an error occurs detecting the encoding 152 */ 153 private void detectEncoding(final char[] cbuf, final int off, final int len) 154 throws IOException { 155 int size = len; 156 final StringBuffer xmlProlog = xmlPrologWriter.getBuffer(); 157 if (xmlProlog.length() + len > BUFFER_SIZE) { 158 size = BUFFER_SIZE - xmlProlog.length(); 159 } 160 xmlPrologWriter.write(cbuf, off, size); 161 162 // try to determine encoding 163 if (xmlProlog.length() >= 5) { 164 if (xmlProlog.substring(0, 5).equals("<?xml")) { 165 // try to extract encoding from XML prolog 166 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 167 if (xmlPrologEnd > 0) { 168 // ok, full XML prolog written: let's extract encoding 169 final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, 170 xmlPrologEnd)); 171 if (m.find()) { 172 encoding = m.group(1).toUpperCase(); 173 encoding = encoding.substring(1, encoding.length() - 1); 174 } else { 175 // no encoding found in XML prolog: using default 176 // encoding 177 encoding = defaultEncoding; 178 } 179 } else { 180 if (xmlProlog.length() >= BUFFER_SIZE) { 181 // no encoding found in first characters: using default 182 // encoding 183 encoding = defaultEncoding; 184 } 185 } 186 } else { 187 // no XML prolog: using default encoding 188 encoding = defaultEncoding; 189 } 190 if (encoding != null) { 191 // encoding has been chosen: let's do it 192 xmlPrologWriter = null; 193 writer = new OutputStreamWriter(out, encoding); 194 writer.write(xmlProlog.toString()); 195 if (len > size) { 196 writer.write(cbuf, off + size, len - size); 197 } 198 } 199 } 200 } 201 202 /** 203 * Write the characters to the underlying writer, detecing encoding. 204 * 205 * @param cbuf the buffer to write the characters from 206 * @param off The start offset 207 * @param len The number of characters to write 208 * @throws IOException if an error occurs detecting the encoding 209 */ 210 @Override 211 public void write(final char[] cbuf, final int off, final int len) throws IOException { 212 if (xmlPrologWriter != null) { 213 detectEncoding(cbuf, off, len); 214 } else { 215 writer.write(cbuf, off, len); 216 } 217 } 218 219 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN; 220}