001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.junit.Assert.assertEquals; 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022 023import java.io.ByteArrayInputStream; 024import java.io.ByteArrayOutputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.OutputStreamWriter; 028import java.io.Writer; 029import java.text.MessageFormat; 030import java.util.HashMap; 031import java.util.Map; 032 033import org.apache.commons.io.IOUtils; 034import org.junit.Test; 035 036/** 037 */ 038public class XmlStreamReaderTest { 039 private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes"; 040 private static final String XML4 = "xml-prolog-encoding-single-quotes"; 041 private static final String XML3 = "xml-prolog-encoding-double-quotes"; 042 private static final String XML2 = "xml-prolog"; 043 private static final String XML1 = "xml"; 044 045 protected void _testRawNoBomValid(final String encoding) throws Exception { 046 InputStream is = getXmlStream("no-bom", XML1, encoding, encoding); 047 XmlStreamReader xmlReader = new XmlStreamReader(is, false); 048 assertEquals(xmlReader.getEncoding(), "UTF-8"); 049 xmlReader.close(); 050 051 is = getXmlStream("no-bom", XML2, encoding, encoding); 052 xmlReader = new XmlStreamReader(is); 053 assertEquals(xmlReader.getEncoding(), "UTF-8"); 054 xmlReader.close(); 055 056 is = getXmlStream("no-bom", XML3, encoding, encoding); 057 xmlReader = new XmlStreamReader(is); 058 assertEquals(xmlReader.getEncoding(), encoding); 059 xmlReader.close(); 060 061 is = getXmlStream("no-bom", XML4, encoding, encoding); 062 xmlReader = new XmlStreamReader(is); 063 assertEquals(xmlReader.getEncoding(), encoding); 064 xmlReader.close(); 065 066 is = getXmlStream("no-bom", XML5, encoding, encoding); 067 xmlReader = new XmlStreamReader(is); 068 assertEquals(xmlReader.getEncoding(), encoding); 069 xmlReader.close(); 070 } 071 072 protected void _testRawNoBomInvalid(final String encoding) throws Exception { 073 final InputStream is = getXmlStream("no-bom", XML3, encoding, encoding); 074 try { 075 new XmlStreamReader(is, false); 076 fail("It should have failed"); 077 } catch (final IOException ex) { 078 assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1); 079 } 080 } 081 082 @Test 083 public void testRawNoBomUsAscii() throws Exception { 084 _testRawNoBomValid("US-ASCII"); 085 } 086 087 @Test 088 public void testRawNoBomUtf8() throws Exception { 089 _testRawNoBomValid("UTF-8"); 090 } 091 092 @Test 093 public void testRawNoBomUtf16BE() throws Exception { 094 _testRawNoBomValid("UTF-16BE"); 095 } 096 097 @Test 098 public void testRawNoBomUtf16LE() throws Exception { 099 _testRawNoBomValid("UTF-16LE"); 100 } 101 102 @Test 103 public void testRawNoBomUtf32BE() throws Exception { 104 _testRawNoBomValid("UTF-32BE"); 105 } 106 107 @Test 108 public void testRawNoBomUtf32LE() throws Exception { 109 _testRawNoBomValid("UTF-32LE"); 110 } 111 112 @Test 113 public void testRawNoBomIso8859_1() throws Exception { 114 _testRawNoBomValid("ISO-8859-1"); 115 } 116 117 @Test 118 public void testRawNoBomCp1047() throws Exception { 119 _testRawNoBomValid("CP1047"); 120 } 121 122 protected void _testRawBomValid(final String encoding) throws Exception { 123 final InputStream is = getXmlStream(encoding + "-bom", XML3, encoding, 124 encoding); 125 final XmlStreamReader xmlReader = new XmlStreamReader(is, false); 126 if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) { 127 assertEquals(xmlReader.getEncoding(), encoding); 128 } else { 129 assertEquals(xmlReader.getEncoding() 130 .substring(0, encoding.length()), encoding); 131 } 132 xmlReader.close(); 133 } 134 135 protected void _testRawBomInvalid(final String bomEnc, final String streamEnc, 136 final String prologEnc) throws Exception { 137 final InputStream is = getXmlStream(bomEnc, XML3, streamEnc, prologEnc); 138 XmlStreamReader xmlReader = null; 139 try { 140 xmlReader = new XmlStreamReader(is, false); 141 final String foundEnc = xmlReader.getEncoding(); 142 fail("Expected IOException for BOM " + bomEnc + ", streamEnc " 143 + streamEnc + " and prologEnc " + prologEnc + ": found " 144 + foundEnc); 145 } catch (final IOException ex) { 146 assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1); 147 } 148 if (xmlReader != null) { 149 xmlReader.close(); 150 } 151 } 152 153 @Test 154 public void testRawBomUtf8() throws Exception { 155 _testRawBomValid("UTF-8"); 156 _testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII"); 157 _testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1"); 158 _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16"); 159 _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE"); 160 _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE"); 161 _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE"); 162 _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE"); 163 _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8"); 164 _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE"); 165 _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE"); 166 _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8"); 167 } 168 169 @Test 170 public void testRawBomUtf16() throws Exception { 171 _testRawBomValid("UTF-16BE"); 172 _testRawBomValid("UTF-16LE"); 173 _testRawBomValid("UTF-16"); 174 175 _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE"); 176 _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE"); 177 _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8"); 178 } 179 180 @Test 181 public void testRawBomUtf32() throws Exception { 182 _testRawBomValid("UTF-32BE"); 183 _testRawBomValid("UTF-32LE"); 184 _testRawBomValid("UTF-32"); 185 186 _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE"); 187 _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE"); 188 _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8"); 189} 190 191 192 @Test 193 public void testHttp() throws Exception { 194 // niallp 2010-10-06 - remove following 2 tests - I reinstated 195 // checks for non-UTF-16 encodings (18 tests) and these failed 196 // _testHttpValid("application/xml", "no-bom", "US-ASCII", null); 197 // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null); 198 _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null); 199 _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8"); 200 _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null); 201 _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null); 202 _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null); 203 _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8"); 204 _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null); 205 _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16"); 206 _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE"); 207 208 _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null); 209 _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16"); 210 _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE"); 211 212 _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null); 213 _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32"); 214 _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE"); 215 216 _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII"); 217 _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8"); 218 _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE"); 219 _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8"); 220 _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE"); 221 222 _testHttpValid("text/xml", "no-bom", "US-ASCII", null); 223 _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8"); 224 _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null); 225 _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null); 226 _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16"); 227 _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE"); 228 _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null); 229 _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32"); 230 _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE"); 231 _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null); 232 233 _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null); 234 _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII"); 235 _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8"); 236 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null); 237 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII"); 238 _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8"); 239 240 _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null); 241 _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16"); 242 _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE"); 243 _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE"); 244 _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null); 245 246 _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null); 247 _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32"); 248 _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE"); 249 _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE"); 250 _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null); 251 252 _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII"); 253 _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8"); 254 _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8"); 255 _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE"); 256 _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16"); 257 _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE"); 258 _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE"); 259 _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32"); 260 _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE"); 261 _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII"); 262 263 _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE"); 264 _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16"); 265 _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE"); 266 _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE"); 267 _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16"); 268 269 _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE"); 270 _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32"); 271 _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE"); 272 _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE"); 273 _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32"); 274 275 _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII"); 276 _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII"); 277 _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8"); 278 _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8"); 279 _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8"); 280 } 281 282 @Test 283 public void testRawContent() throws Exception { 284 final String encoding = "UTF-8"; 285 final String xml = getXML("no-bom", XML3, encoding, encoding); 286 final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); 287 final XmlStreamReader xmlReader = new XmlStreamReader(is); 288 assertEquals("Check encoding", xmlReader.getEncoding(), encoding); 289 assertEquals("Check content", xml, IOUtils.toString(xmlReader)); 290 } 291 292 @Test 293 public void testHttpContent() throws Exception { 294 final String encoding = "UTF-8"; 295 final String xml = getXML("no-bom", XML3, encoding, encoding); 296 final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); 297 final XmlStreamReader xmlReader = new XmlStreamReader(is, encoding); 298 assertEquals("Check encoding", xmlReader.getEncoding(), encoding); 299 assertEquals("Check content", xml, IOUtils.toString(xmlReader)); 300 } 301 302 public void _testAlternateDefaultEncoding(final String cT, final String bomEnc, 303 final String streamEnc, final String prologEnc, final String alternateEnc) 304 throws Exception { 305 final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML1 306 : XML3, streamEnc, prologEnc); 307 final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false, alternateEnc); 308 if (!streamEnc.equals("UTF-16")) { 309 // we can not assert things here because UTF-8, US-ASCII and 310 // ISO-8859-1 look alike for the chars used for detection 311 // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed) 312 final String enc = alternateEnc != null ? alternateEnc : streamEnc; 313 assertEquals(xmlReader.getEncoding(), enc); 314 } else { 315 //String enc = (alternateEnc != null) ? alternateEnc : streamEnc; 316 assertEquals(xmlReader.getEncoding().substring(0, 317 streamEnc.length()), streamEnc); 318 } 319 xmlReader.close(); 320 } 321 322 public void _testHttpValid(final String cT, final String bomEnc, final String streamEnc, 323 final String prologEnc) throws Exception { 324 final InputStream is = getXmlStream(bomEnc, 325 prologEnc == null ? XML1 : XML3, streamEnc, prologEnc); 326 final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false); 327 if (!streamEnc.equals("UTF-16")) { 328 // we can not assert things here because UTF-8, US-ASCII and 329 // ISO-8859-1 look alike for the chars used for detection 330 // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed) 331 assertEquals(xmlReader.getEncoding(), streamEnc); 332 } else { 333 assertEquals(xmlReader.getEncoding().substring(0, 334 streamEnc.length()), streamEnc); 335 } 336 xmlReader.close(); 337 } 338 339 protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc, 340 final String prologEnc) throws Exception { 341 final InputStream is = getXmlStream(bomEnc, 342 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc); 343 try { 344 new XmlStreamReader(is, cT, false); 345 fail("It should have failed for HTTP Content-type " + cT + ", BOM " 346 + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " 347 + prologEnc); 348 } catch (final IOException ex) { 349 assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1); 350 } 351 } 352 353 protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc, 354 final String prologEnc, final String shouldbe) throws Exception { 355 final InputStream is = getXmlStream(bomEnc, 356 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc); 357 final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true); 358 assertEquals(xmlReader.getEncoding(), shouldbe); 359 xmlReader.close(); 360 } 361 362 private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n" 363 + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n" 364 + "\n" 365 + " <atom:entry>\n" 366 + " <atom:title encoding='base64'><![CDATA\n" 367 + "aW5nTGluZSIgLz4"; 368 369 @Test 370 public void testEncodingAttributeXML() throws Exception { 371 final InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML 372 .getBytes("UTF-8")); 373 final XmlStreamReader xmlReader = new XmlStreamReader(is, "", true); 374 assertEquals(xmlReader.getEncoding(), "UTF-8"); 375 xmlReader.close(); 376 } 377 378 // XML Stream generator 379 380 private static final int[] NO_BOM_BYTES = {}; 381 private static final int[] UTF_16BE_BOM_BYTES = { 0xFE, 0xFF }; 382 private static final int[] UTF_16LE_BOM_BYTES = { 0xFF, 0XFE }; 383 private static final int[] UTF_32BE_BOM_BYTES = { 0x00, 0x00, 0xFE, 0xFF }; 384 private static final int[] UTF_32LE_BOM_BYTES = { 0xFF, 0XFE, 0x00, 0x00 }; 385 private static final int[] UTF_8_BOM_BYTES = { 0xEF, 0xBB, 0xBF }; 386 387 private static final Map<String, int[]> BOMs = new HashMap<String, int[]>(); 388 389 static { 390 BOMs.put("no-bom", NO_BOM_BYTES); 391 BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES); 392 BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES); 393 BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES); 394 BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES); 395 BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer 396 BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES); 397 } 398 399 private static final MessageFormat XML = new MessageFormat( 400 "<root>{2}</root>"); 401 private static final MessageFormat XML_WITH_PROLOG = new MessageFormat( 402 "<?xml version=\"1.0\"?>\n<root>{2}</root>"); 403 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat( 404 "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>"); 405 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat( 406 "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>"); 407 private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat( 408 "<?xml version=\"1.0\" encoding = \t \n \r''{1}''?>\n<root>{2}</root>"); 409 410 private static final MessageFormat INFO = new MessageFormat( 411 "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n"); 412 413 private static final Map<String,MessageFormat> XMLs = new HashMap<String,MessageFormat>(); 414 415 static { 416 XMLs.put(XML1, XML); 417 XMLs.put(XML2, XML_WITH_PROLOG); 418 XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES); 419 XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES); 420 XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES); 421 } 422 423 /** 424 * 425 * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom 426 * @param xmlType xml, xml-prolog, xml-prolog-charset 427 * @return XML stream 428 */ 429 protected InputStream getXmlStream(final String bomType, final String xmlType, 430 final String streamEnc, final String prologEnc) throws IOException { 431 final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); 432 int[] bom = BOMs.get(bomType); 433 if (bom == null) { 434 bom = new int[0]; 435 } 436 for (final int element : bom) { 437 baos.write(element); 438 } 439 final Writer writer = new OutputStreamWriter(baos, streamEnc); 440 final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc); 441 writer.write(xmlDoc); 442 443 // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE 444 writer.write("<da>\n"); 445 for (int i = 0; i < 10000; i++) { 446 writer.write("<do/>\n"); 447 } 448 writer.write("</da>\n"); 449 450 writer.close(); 451 return new ByteArrayInputStream(baos.toByteArray()); 452 } 453 454 /** 455 * Create the XML. 456 */ 457 private String getXML(final String bomType, final String xmlType, 458 final String streamEnc, final String prologEnc) { 459 final MessageFormat xml = XMLs.get(xmlType); 460 final String info = INFO.format(new Object[] { bomType, xmlType, prologEnc }); 461 final String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info }); 462 return xmlDoc; 463 } 464}