View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertTrue;
21  import static org.junit.Assert.fail;
22  
23  import java.io.ByteArrayInputStream;
24  import java.io.ByteArrayOutputStream;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.OutputStreamWriter;
28  import java.io.Writer;
29  import java.text.MessageFormat;
30  import java.util.HashMap;
31  import java.util.Map;
32  
33  import org.apache.commons.io.IOUtils;
34  import org.junit.Test;
35  
36  /**
37   */
38  public class XmlStreamReaderTest {
39      private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
40      private static final String XML4 = "xml-prolog-encoding-single-quotes";
41      private static final String XML3 = "xml-prolog-encoding-double-quotes";
42      private static final String XML2 = "xml-prolog";
43      private static final String XML1 = "xml";
44  
45      protected void _testRawNoBomValid(final String encoding) throws Exception {
46          InputStream is = getXmlStream("no-bom", XML1, encoding, encoding);
47          XmlStreamReader xmlReader = new XmlStreamReader(is, false);
48          assertEquals(xmlReader.getEncoding(), "UTF-8");
49          xmlReader.close();
50  
51          is = getXmlStream("no-bom", XML2, encoding, encoding);
52          xmlReader = new XmlStreamReader(is);
53          assertEquals(xmlReader.getEncoding(), "UTF-8");
54          xmlReader.close();
55  
56          is = getXmlStream("no-bom", XML3, encoding, encoding);
57          xmlReader = new XmlStreamReader(is);
58          assertEquals(xmlReader.getEncoding(), encoding);
59          xmlReader.close();
60  
61          is = getXmlStream("no-bom", XML4, encoding, encoding);
62          xmlReader = new XmlStreamReader(is);
63          assertEquals(xmlReader.getEncoding(), encoding);
64          xmlReader.close();
65  
66          is = getXmlStream("no-bom", XML5, encoding, encoding);
67          xmlReader = new XmlStreamReader(is);
68          assertEquals(xmlReader.getEncoding(), encoding);
69          xmlReader.close();
70      }
71  
72      protected void _testRawNoBomInvalid(final String encoding) throws Exception {
73          final InputStream is = getXmlStream("no-bom", XML3, encoding, encoding);
74          try {
75              new XmlStreamReader(is, false);
76              fail("It should have failed");
77          } catch (final IOException ex) {
78              assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
79          }
80      }
81  
82      @Test
83      public void testRawNoBomUsAscii() throws Exception {
84          _testRawNoBomValid("US-ASCII");
85      }
86  
87      @Test
88      public void testRawNoBomUtf8() throws Exception {
89          _testRawNoBomValid("UTF-8");
90      }
91  
92      @Test
93      public void testRawNoBomUtf16BE() throws Exception {
94          _testRawNoBomValid("UTF-16BE");
95      }
96  
97      @Test
98      public void testRawNoBomUtf16LE() throws Exception {
99          _testRawNoBomValid("UTF-16LE");
100     }
101 
102     @Test
103     public void testRawNoBomUtf32BE() throws Exception {
104         _testRawNoBomValid("UTF-32BE");
105     }
106 
107     @Test
108     public void testRawNoBomUtf32LE() throws Exception {
109         _testRawNoBomValid("UTF-32LE");
110     }
111 
112     @Test
113     public void testRawNoBomIso8859_1() throws Exception {
114         _testRawNoBomValid("ISO-8859-1");
115     }
116 
117     @Test
118     public void testRawNoBomCp1047() throws Exception {
119         _testRawNoBomValid("CP1047");
120     }
121 
122     protected void _testRawBomValid(final String encoding) throws Exception {
123         final InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
124                 encoding);
125         final XmlStreamReader xmlReader = new XmlStreamReader(is, false);
126         if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
127             assertEquals(xmlReader.getEncoding(), encoding);
128         } else {
129             assertEquals(xmlReader.getEncoding()
130                     .substring(0, encoding.length()), encoding);
131         }
132         xmlReader.close();
133     }
134 
135     protected void _testRawBomInvalid(final String bomEnc, final String streamEnc,
136             final String prologEnc) throws Exception {
137         final InputStream is = getXmlStream(bomEnc, XML3, streamEnc, prologEnc);
138         XmlStreamReader xmlReader = null;
139         try {
140             xmlReader = new XmlStreamReader(is, false);
141             final String foundEnc = xmlReader.getEncoding();
142             fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
143                     + streamEnc + " and prologEnc " + prologEnc + ": found "
144                     + foundEnc);
145         } catch (final IOException ex) {
146             assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
147         }
148         if (xmlReader != null) {
149             xmlReader.close();
150         }
151     }
152 
153     @Test
154     public void testRawBomUtf8() throws Exception {
155         _testRawBomValid("UTF-8");
156         _testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII");
157         _testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1");
158         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16");
159         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE");
160         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE");
161         _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
162         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
163         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
164         _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
165         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
166         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
167     }
168 
169     @Test
170     public void testRawBomUtf16() throws Exception {
171         _testRawBomValid("UTF-16BE");
172         _testRawBomValid("UTF-16LE");
173         _testRawBomValid("UTF-16");
174 
175         _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
176         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
177         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
178     }
179 
180     @Test
181     public void testRawBomUtf32() throws Exception {
182         _testRawBomValid("UTF-32BE");
183         _testRawBomValid("UTF-32LE");
184         _testRawBomValid("UTF-32");
185 
186         _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
187         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
188         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
189 }
190 
191 
192     @Test
193     public void testHttp() throws Exception {
194         // niallp 2010-10-06 - remove following 2 tests - I reinstated
195         // checks for non-UTF-16 encodings (18 tests) and these failed
196         // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
197         // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
198         _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
199         _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
200         _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
201         _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
202         _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
203         _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
204         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
205         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
206         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
207 
208         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
209         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
210         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
211 
212         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
213         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
214         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
215 
216         _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
217         _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
218         _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
219         _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
220         _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
221 
222         _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
223         _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
224         _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
225         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
226         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
227         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
228         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
229         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
230         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
231         _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
232 
233         _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
234         _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
235         _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
236         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
237         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
238         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
239 
240         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
241         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
242         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
243         _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
244         _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
245 
246         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
247         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
248         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
249         _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
250         _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
251 
252         _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
253         _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
254         _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
255         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
256         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
257         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
258         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
259         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
260         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
261         _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
262 
263         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
264         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
265         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
266         _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
267         _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
268 
269         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
270         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
271         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
272         _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
273         _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
274 
275         _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
276         _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
277         _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
278         _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
279         _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
280     }
281 
282     @Test
283     public void testRawContent() throws Exception {
284         final String encoding = "UTF-8";
285         final String xml = getXML("no-bom", XML3, encoding, encoding);
286         final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
287         final XmlStreamReader xmlReader = new XmlStreamReader(is);
288         assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
289         assertEquals("Check content", xml, IOUtils.toString(xmlReader));
290     }
291 
292     @Test
293     public void testHttpContent() throws Exception {
294         final String encoding = "UTF-8";
295         final String xml = getXML("no-bom", XML3, encoding, encoding);
296         final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
297         final XmlStreamReader xmlReader = new XmlStreamReader(is, encoding);
298         assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
299         assertEquals("Check content", xml, IOUtils.toString(xmlReader));
300     }
301 
302     public void _testAlternateDefaultEncoding(final String cT, final String bomEnc,
303             final String streamEnc, final String prologEnc, final String alternateEnc)
304             throws Exception {
305         final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML1
306                 : XML3, streamEnc, prologEnc);
307         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false, alternateEnc);
308         if (!streamEnc.equals("UTF-16")) {
309             // we can not assert things here because UTF-8, US-ASCII and
310             // ISO-8859-1 look alike for the chars used for detection
311             // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed)
312             final String enc = alternateEnc != null ? alternateEnc : streamEnc;
313             assertEquals(xmlReader.getEncoding(), enc);
314         } else {
315             //String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
316             assertEquals(xmlReader.getEncoding().substring(0,
317                     streamEnc.length()), streamEnc);
318         }
319         xmlReader.close();
320     }
321 
322     public void _testHttpValid(final String cT, final String bomEnc, final String streamEnc,
323             final String prologEnc) throws Exception {
324         final InputStream is = getXmlStream(bomEnc,
325                 prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
326         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false);
327         if (!streamEnc.equals("UTF-16")) {
328             // we can not assert things here because UTF-8, US-ASCII and
329             // ISO-8859-1 look alike for the chars used for detection
330             // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed)
331             assertEquals(xmlReader.getEncoding(), streamEnc);
332         } else {
333             assertEquals(xmlReader.getEncoding().substring(0,
334                     streamEnc.length()), streamEnc);
335         }
336         xmlReader.close();
337     }
338 
339     protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
340             final String prologEnc) throws Exception {
341         final InputStream is = getXmlStream(bomEnc,
342                 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
343         try {
344             new XmlStreamReader(is, cT, false);
345             fail("It should have failed for HTTP Content-type " + cT + ", BOM "
346                     + bomEnc + ", streamEnc " + streamEnc + " and prologEnc "
347                     + prologEnc);
348         } catch (final IOException ex) {
349             assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
350         }
351     }
352 
353     protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
354             final String prologEnc, final String shouldbe) throws Exception {
355         final InputStream is = getXmlStream(bomEnc,
356                 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
357         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true);
358         assertEquals(xmlReader.getEncoding(), shouldbe);
359         xmlReader.close();
360     }
361 
362     private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
363             + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
364             + "\n"
365             + "  <atom:entry>\n"
366             + "    <atom:title encoding='base64'><![CDATA\n"
367             + "aW5nTGluZSIgLz4";
368 
369     @Test
370     public void testEncodingAttributeXML() throws Exception {
371         final InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML
372                 .getBytes("UTF-8"));
373         final XmlStreamReader xmlReader = new XmlStreamReader(is, "", true);
374         assertEquals(xmlReader.getEncoding(), "UTF-8");
375         xmlReader.close();
376     }
377 
378     // XML Stream generator
379 
380     private static final int[] NO_BOM_BYTES = {};
381     private static final int[] UTF_16BE_BOM_BYTES = { 0xFE, 0xFF };
382     private static final int[] UTF_16LE_BOM_BYTES = { 0xFF, 0XFE };
383     private static final int[] UTF_32BE_BOM_BYTES = { 0x00, 0x00, 0xFE, 0xFF };
384     private static final int[] UTF_32LE_BOM_BYTES = { 0xFF, 0XFE, 0x00, 0x00 };
385     private static final int[] UTF_8_BOM_BYTES = { 0xEF, 0xBB, 0xBF };
386 
387     private static final Map<String, int[]> BOMs = new HashMap<String, int[]>();
388 
389     static {
390         BOMs.put("no-bom", NO_BOM_BYTES);
391         BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
392         BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
393         BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
394         BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
395         BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
396         BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
397     }
398 
399     private static final MessageFormat XML = new MessageFormat(
400             "<root>{2}</root>");
401     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
402             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
403     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
404             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
405     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
406             "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
407     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
408             "<?xml version=\"1.0\" encoding =  \t \n \r''{1}''?>\n<root>{2}</root>");
409 
410     private static final MessageFormat INFO = new MessageFormat(
411             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
412 
413     private static final Map<String,MessageFormat> XMLs = new HashMap<String,MessageFormat>();
414 
415     static {
416         XMLs.put(XML1, XML);
417         XMLs.put(XML2, XML_WITH_PROLOG);
418         XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
419         XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
420         XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
421     }
422 
423     /**
424      *
425      * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
426      * @param xmlType xml, xml-prolog, xml-prolog-charset
427      * @return XML stream
428      */
429     protected InputStream getXmlStream(final String bomType, final String xmlType,
430             final String streamEnc, final String prologEnc) throws IOException {
431         final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
432         int[] bom = BOMs.get(bomType);
433         if (bom == null) {
434             bom = new int[0];
435         }
436         for (final int element : bom) {
437             baos.write(element);
438         }
439         final Writer writer = new OutputStreamWriter(baos, streamEnc);
440         final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
441         writer.write(xmlDoc);
442 
443         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
444         writer.write("<da>\n");
445         for (int i = 0; i < 10000; i++) {
446             writer.write("<do/>\n");
447         }
448         writer.write("</da>\n");
449 
450         writer.close();
451         return new ByteArrayInputStream(baos.toByteArray());
452     }
453 
454     /**
455      * Create the XML.
456      */
457     private String getXML(final String bomType, final String xmlType,
458             final String streamEnc, final String prologEnc) {
459         final MessageFormat xml = XMLs.get(xmlType);
460         final String info = INFO.format(new Object[] { bomType, xmlType, prologEnc });
461         final String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info });
462         return xmlDoc;
463     }
464 }