001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.nio.charset.Charset; 028import java.nio.charset.StandardCharsets; 029import java.util.Locale; 030import java.util.Objects; 031import java.util.regex.Matcher; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.input.XmlStreamReader; 037 038/** 039 * Character stream that handles all the necessary work to figure out the charset encoding of the XML document written to the stream. 040 * <p> 041 * To build an instance, use {@link Builder}. 042 * </p> 043 * 044 * @see Builder 045 * @see XmlStreamReader 046 * @since 2.0 047 */ 048public class XmlStreamWriter extends Writer { 049 050 // @formatter:off 051 /** 052 * Builds a new {@link XmlStreamWriter}. 053 * 054 * <p> 055 * For example: 056 * </p> 057 * <pre>{@code 058 * WriterOutputStream w = WriterOutputStream.builder() 059 * .setPath(path) 060 * .setCharset(StandardCharsets.UTF_8) 061 * .get();} 062 * </pre> 063 * 064 * @see #get() 065 * @since 2.12.0 066 */ 067 // @formatter:off 068 public static class Builder extends AbstractStreamBuilder<XmlStreamWriter, Builder> { 069 070 /** 071 * Constructs a new builder of {@link XmlStreamWriter}. 072 */ 073 public Builder() { 074 setCharsetDefault(StandardCharsets.UTF_8); 075 setCharset(StandardCharsets.UTF_8); 076 } 077 078 /** 079 * Builds a new {@link XmlStreamWriter}. 080 * <p> 081 * You must set an aspect that supports {@link #getOutputStream()} on this builder, otherwise, this method throws an exception. 082 * </p> 083 * <p> 084 * This builder uses the following aspects: 085 * </p> 086 * <ul> 087 * <li>{@link #getOutputStream()}</li> 088 * <li>{@link #getCharset()}</li> 089 * </ul> 090 * 091 * @return a new instance. 092 * @throws IllegalStateException if the {@code origin} is {@code null}. 093 * @throws UnsupportedOperationException if the origin cannot be converted to an {@link OutputStream}. 094 * @throws IOException if an I/O error occurs converting to an {@link OutputStream} using {@link #getOutputStream()}. 095 * @see #getOutputStream() 096 * @see #getUnchecked() 097 */ 098 @Override 099 public XmlStreamWriter get() throws IOException { 100 return new XmlStreamWriter(this); 101 } 102 103 } 104 105 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 106 107 /** 108 * Constructs a new {@link Builder}. 109 * 110 * @return a new {@link Builder}. 111 * @since 2.12.0 112 */ 113 public static Builder builder() { 114 return new Builder(); 115 } 116 117 private final OutputStream out; 118 119 private final Charset defaultCharset; 120 121 private StringWriter prologWriter = new StringWriter(BUFFER_SIZE); 122 123 private Writer writer; 124 125 private Charset charset; 126 127 @SuppressWarnings("resource") // caller closes. 128 private XmlStreamWriter(final Builder builder) throws IOException { 129 this(builder.getOutputStream(), builder.getCharset()); 130 } 131 132 /** 133 * Constructs a new XML stream writer for the specified file 134 * with a default encoding of UTF-8. 135 * 136 * @param file The file to write to 137 * @throws FileNotFoundException if there is an error creating or 138 * opening the file 139 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 140 */ 141 @Deprecated 142 public XmlStreamWriter(final File file) throws FileNotFoundException { 143 this(file, null); 144 } 145 146 /** 147 * Constructs a new XML stream writer for the specified file 148 * with the specified default encoding. 149 * 150 * @param file The file to write to 151 * @param defaultEncoding The default encoding if not encoding could be detected 152 * @throws FileNotFoundException if there is an error creating or 153 * opening the file 154 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 155 */ 156 @Deprecated 157 @SuppressWarnings("resource") 158 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 159 this(new FileOutputStream(file), defaultEncoding); 160 } 161 162 /** 163 * Constructs a new XML stream writer for the specified output stream 164 * with a default encoding of UTF-8. 165 * 166 * @param out The output stream 167 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 168 */ 169 @Deprecated 170 public XmlStreamWriter(final OutputStream out) { 171 this(out, StandardCharsets.UTF_8); 172 } 173 174 /** 175 * Constructs a new XML stream writer for the specified output stream 176 * with the specified default encoding. 177 * 178 * @param out The output stream 179 * @param defaultEncoding The default encoding if not encoding could be detected 180 */ 181 private XmlStreamWriter(final OutputStream out, final Charset defaultEncoding) { 182 this.out = out; 183 this.defaultCharset = Objects.requireNonNull(defaultEncoding); 184 } 185 186 /** 187 * Constructs a new XML stream writer for the specified output stream 188 * with the specified default encoding. 189 * 190 * @param out The output stream 191 * @param defaultEncoding The default encoding if not encoding could be detected 192 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 193 */ 194 @Deprecated 195 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 196 this(out, Charsets.toCharset(defaultEncoding, StandardCharsets.UTF_8)); 197 } 198 199 /** 200 * Closes the underlying writer. 201 * 202 * @throws IOException if an error occurs closing the underlying writer 203 */ 204 @Override 205 public void close() throws IOException { 206 if (writer == null) { 207 charset = defaultCharset; 208 writer = new OutputStreamWriter(out, charset); 209 writer.write(prologWriter.toString()); 210 } 211 writer.close(); 212 } 213 214 /** 215 * Detects the encoding. 216 * 217 * @param cbuf the buffer to write the characters from 218 * @param off The start offset 219 * @param len The number of characters to write 220 * @throws IOException if an error occurs detecting the encoding 221 */ 222 private void detectEncoding(final char[] cbuf, final int off, final int len) 223 throws IOException { 224 int size = len; 225 final StringBuffer xmlProlog = prologWriter.getBuffer(); 226 if (xmlProlog.length() + len > BUFFER_SIZE) { 227 size = BUFFER_SIZE - xmlProlog.length(); 228 } 229 prologWriter.write(cbuf, off, size); 230 231 // try to determine encoding 232 if (xmlProlog.length() >= 5) { 233 if (xmlProlog.substring(0, 5).equals("<?xml")) { 234 // try to extract encoding from XML prolog 235 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 236 if (xmlPrologEnd > 0) { 237 // ok, full XML prolog written: let's extract encoding 238 final Matcher m = XmlStreamReader.ENCODING_PATTERN.matcher(xmlProlog.substring(0, 239 xmlPrologEnd)); 240 if (m.find()) { 241 final String encName = m.group(1).toUpperCase(Locale.ROOT); 242 charset = Charset.forName(encName.substring(1, encName.length() - 1)); 243 } else { 244 // no encoding found in XML prolog: using default 245 // encoding 246 charset = defaultCharset; 247 } 248 } else if (xmlProlog.length() >= BUFFER_SIZE) { 249 // no encoding found in first characters: using default 250 // encoding 251 charset = defaultCharset; 252 } 253 } else { 254 // no XML prolog: using default encoding 255 charset = defaultCharset; 256 } 257 if (charset != null) { 258 // encoding has been chosen: let's do it 259 prologWriter = null; 260 writer = new OutputStreamWriter(out, charset); 261 writer.write(xmlProlog.toString()); 262 if (len > size) { 263 writer.write(cbuf, off + size, len - size); 264 } 265 } 266 } 267 } 268 269 /** 270 * Flushes the underlying writer. 271 * 272 * @throws IOException if an error occurs flushing the underlying writer 273 */ 274 @Override 275 public void flush() throws IOException { 276 if (writer != null) { 277 writer.flush(); 278 } 279 } 280 281 /** 282 * Returns the default encoding. 283 * 284 * @return the default encoding 285 */ 286 public String getDefaultEncoding() { 287 return defaultCharset.name(); 288 } 289 290 /** 291 * Returns the detected encoding. 292 * 293 * @return the detected encoding 294 */ 295 public String getEncoding() { 296 return charset.name(); 297 } 298 299 /** 300 * Writes the characters to the underlying writer, detecting encoding. 301 * 302 * @param cbuf the buffer to write the characters from 303 * @param off The start offset 304 * @param len The number of characters to write 305 * @throws IOException if an error occurs detecting the encoding 306 */ 307 @Override 308 public void write(final char[] cbuf, final int off, final int len) throws IOException { 309 if (prologWriter != null) { 310 detectEncoding(cbuf, off, len); 311 } else { 312 writer.write(cbuf, off, len); 313 } 314 } 315}