View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.*;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.OutputStreamWriter;
26  import java.io.Writer;
27  import java.text.MessageFormat;
28  import java.util.HashMap;
29  import java.util.Map;
30  
31  import org.apache.commons.io.IOUtils;
32  import org.junit.Test;
33  
34  public class XmlStreamReaderTest {
35      private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
36      private static final String XML4 = "xml-prolog-encoding-single-quotes";
37      private static final String XML3 = "xml-prolog-encoding-double-quotes";
38      private static final String XML2 = "xml-prolog";
39      private static final String XML1 = "xml";
40  
41      protected void _testRawNoBomValid(final String encoding) throws Exception {
42          InputStream is = getXmlStream("no-bom", XML1, encoding, encoding);
43          XmlStreamReader xmlReader = new XmlStreamReader(is, false);
44          assertEquals(xmlReader.getEncoding(), "UTF-8");
45          xmlReader.close();
46  
47          is = getXmlStream("no-bom", XML2, encoding, encoding);
48          xmlReader = new XmlStreamReader(is);
49          assertEquals(xmlReader.getEncoding(), "UTF-8");
50          xmlReader.close();
51  
52          is = getXmlStream("no-bom", XML3, encoding, encoding);
53          xmlReader = new XmlStreamReader(is);
54          assertEquals(xmlReader.getEncoding(), encoding);
55          xmlReader.close();
56  
57          is = getXmlStream("no-bom", XML4, encoding, encoding);
58          xmlReader = new XmlStreamReader(is);
59          assertEquals(xmlReader.getEncoding(), encoding);
60          xmlReader.close();
61  
62          is = getXmlStream("no-bom", XML5, encoding, encoding);
63          xmlReader = new XmlStreamReader(is);
64          assertEquals(xmlReader.getEncoding(), encoding);
65          xmlReader.close();
66      }
67  
68      protected void _testRawNoBomInvalid(final String encoding) throws Exception {
69          final InputStream is = getXmlStream("no-bom", XML3, encoding, encoding);
70          try {
71              (new XmlStreamReader(is, false)).close();;
72              fail("It should have failed");
73          } catch (final IOException ex) {
74              assertTrue(ex.getMessage().contains("Invalid encoding,"));
75          }
76      }
77  
78      @Test
79      public void testRawNoBomUsAscii() throws Exception {
80          _testRawNoBomValid("US-ASCII");
81      }
82  
83      @Test
84      public void testRawNoBomUtf8() throws Exception {
85          _testRawNoBomValid("UTF-8");
86      }
87  
88      @Test
89      public void testRawNoBomUtf16BE() throws Exception {
90          _testRawNoBomValid("UTF-16BE");
91      }
92  
93      @Test
94      public void testRawNoBomUtf16LE() throws Exception {
95          _testRawNoBomValid("UTF-16LE");
96      }
97  
98      @Test
99      public void testRawNoBomUtf32BE() throws Exception {
100         _testRawNoBomValid("UTF-32BE");
101     }
102 
103     @Test
104     public void testRawNoBomUtf32LE() throws Exception {
105         _testRawNoBomValid("UTF-32LE");
106     }
107 
108     @Test
109     public void testRawNoBomIso8859_1() throws Exception {
110         _testRawNoBomValid("ISO-8859-1");
111     }
112 
113     @Test
114     public void testRawNoBomCp1047() throws Exception {
115         _testRawNoBomValid("CP1047");
116     }
117 
118     protected void _testRawBomValid(final String encoding) throws Exception {
119         final InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
120                 encoding);
121         final XmlStreamReader xmlReader = new XmlStreamReader(is, false);
122         if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
123             assertEquals(xmlReader.getEncoding(), encoding);
124         } else {
125             assertEquals(xmlReader.getEncoding()
126                     .substring(0, encoding.length()), encoding);
127         }
128         xmlReader.close();
129     }
130 
131     protected void _testRawBomInvalid(final String bomEnc, final String streamEnc,
132                                       final String prologEnc) throws Exception {
133         final InputStream is = getXmlStream(bomEnc, XML3, streamEnc, prologEnc);
134         XmlStreamReader xmlReader = null;
135         try {
136             xmlReader = new XmlStreamReader(is, false);
137             final String foundEnc = xmlReader.getEncoding();
138             fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
139                     + streamEnc + " and prologEnc " + prologEnc + ": found "
140                     + foundEnc);
141         } catch (final IOException ex) {
142             assertTrue(ex.getMessage().contains("Invalid encoding,"));
143         }
144         if (xmlReader != null) {
145             xmlReader.close();
146         }
147     }
148 
149     @Test
150     public void testRawBomUtf8() throws Exception {
151         _testRawBomValid("UTF-8");
152         _testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII");
153         _testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1");
154         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16");
155         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE");
156         _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE");
157         _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
158         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
159         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
160         _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
161         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
162         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
163     }
164 
165     @Test
166     public void testRawBomUtf16() throws Exception {
167         _testRawBomValid("UTF-16BE");
168         _testRawBomValid("UTF-16LE");
169         _testRawBomValid("UTF-16");
170 
171         _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
172         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
173         _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
174     }
175 
176     @Test
177     public void testRawBomUtf32() throws Exception {
178         _testRawBomValid("UTF-32BE");
179         _testRawBomValid("UTF-32LE");
180         _testRawBomValid("UTF-32");
181 
182         _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
183         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
184         _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
185     }
186 
187 
188     @Test
189     public void testHttp() throws Exception {
190         // niallp 2010-10-06 - remove following 2 tests - I reinstated
191         // checks for non-UTF-16 encodings (18 tests) and these failed
192         // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
193         // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
194         _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
195         _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
196         _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
197         _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
198         _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
199         _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
200         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
201         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
202         _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
203 
204         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
205         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
206         _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
207 
208         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
209         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
210         _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
211 
212         _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
213         _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
214         _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
215         _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
216         _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
217 
218         _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
219         _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
220         _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
221         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
222         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
223         _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
224         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
225         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
226         _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
227         _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
228 
229         _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
230         _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
231         _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
232         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
233         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
234         _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
235 
236         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
237         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
238         _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
239         _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
240         _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
241 
242         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
243         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
244         _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
245         _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
246         _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
247 
248         _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
249         _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
250         _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
251         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
252         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
253         _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
254         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
255         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
256         _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
257         _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
258 
259         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
260         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
261         _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
262         _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
263         _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
264 
265         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
266         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
267         _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
268         _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
269         _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
270 
271         _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
272         _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
273         _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
274         _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
275         _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
276     }
277 
278     @Test
279     public void testRawContent() throws Exception {
280         final String encoding = "UTF-8";
281         final String xml = getXML("no-bom", XML3, encoding, encoding);
282         final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
283         final XmlStreamReader xmlReader = new XmlStreamReader(is);
284         assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
285         assertEquals("Check content", xml, IOUtils.toString(xmlReader));
286     }
287 
288     @Test
289     public void testHttpContent() throws Exception {
290         final String encoding = "UTF-8";
291         final String xml = getXML("no-bom", XML3, encoding, encoding);
292         final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
293         final XmlStreamReader xmlReader = new XmlStreamReader(is, encoding);
294         assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
295         assertEquals("Check content", xml, IOUtils.toString(xmlReader));
296     }
297 
298     public void _testAlternateDefaultEncoding(final String cT, final String bomEnc,
299                                               final String streamEnc, final String prologEnc, final String alternateEnc)
300             throws Exception {
301         final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML1
302                 : XML3, streamEnc, prologEnc);
303         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false, alternateEnc);
304         if (!streamEnc.equals("UTF-16")) {
305             // we can not assert things here because UTF-8, US-ASCII and
306             // ISO-8859-1 look alike for the chars used for detection
307             // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed)
308             final String enc = alternateEnc != null ? alternateEnc : streamEnc;
309             assertEquals(xmlReader.getEncoding(), enc);
310         } else {
311             //String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
312             assertEquals(xmlReader.getEncoding().substring(0,
313                     streamEnc.length()), streamEnc);
314         }
315         xmlReader.close();
316     }
317 
318     public void _testHttpValid(final String cT, final String bomEnc, final String streamEnc,
319                                final String prologEnc) throws Exception {
320         final InputStream is = getXmlStream(bomEnc,
321                 prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
322         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false);
323         if (!streamEnc.equals("UTF-16")) {
324             // we can not assert things here because UTF-8, US-ASCII and
325             // ISO-8859-1 look alike for the chars used for detection
326             // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed)
327             assertEquals(xmlReader.getEncoding(), streamEnc);
328         } else {
329             assertEquals(xmlReader.getEncoding().substring(0,
330                     streamEnc.length()), streamEnc);
331         }
332         xmlReader.close();
333     }
334 
335     protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
336                                     final String prologEnc) throws Exception {
337         final InputStream is = getXmlStream(bomEnc,
338                 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
339         try {
340             (new XmlStreamReader(is, cT, false)).close();;
341             fail("It should have failed for HTTP Content-type " + cT + ", BOM "
342                     + bomEnc + ", streamEnc " + streamEnc + " and prologEnc "
343                     + prologEnc);
344         } catch (final IOException ex) {
345             assertTrue(ex.getMessage().contains("Invalid encoding,"));
346         }
347     }
348 
349     protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
350                                     final String prologEnc, final String shouldbe) throws Exception {
351         final InputStream is = getXmlStream(bomEnc,
352                 prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
353         final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true);
354         assertEquals(xmlReader.getEncoding(), shouldbe);
355         xmlReader.close();
356     }
357 
358     private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
359             + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
360             + "\n"
361             + "  <atom:entry>\n"
362             + "    <atom:title encoding='base64'><![CDATA\n"
363             + "aW5nTGluZSIgLz4";
364 
365     @Test
366     public void testEncodingAttributeXML() throws Exception {
367         final InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML
368                 .getBytes("UTF-8"));
369         final XmlStreamReader xmlReader = new XmlStreamReader(is, "", true);
370         assertEquals(xmlReader.getEncoding(), "UTF-8");
371         xmlReader.close();
372     }
373 
374     // XML Stream generator
375 
376     private static final int[] NO_BOM_BYTES = {};
377     private static final int[] UTF_16BE_BOM_BYTES = {0xFE, 0xFF};
378     private static final int[] UTF_16LE_BOM_BYTES = {0xFF, 0XFE};
379     private static final int[] UTF_32BE_BOM_BYTES = {0x00, 0x00, 0xFE, 0xFF};
380     private static final int[] UTF_32LE_BOM_BYTES = {0xFF, 0XFE, 0x00, 0x00};
381     private static final int[] UTF_8_BOM_BYTES = {0xEF, 0xBB, 0xBF};
382 
383     private static final Map<String, int[]> BOMs = new HashMap<String, int[]>();
384 
385     static {
386         BOMs.put("no-bom", NO_BOM_BYTES);
387         BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
388         BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
389         BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
390         BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
391         BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
392         BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
393     }
394 
395     private static final MessageFormat XML = new MessageFormat(
396             "<root>{2}</root>");
397     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
398             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
399     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
400             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
401     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
402             "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
403     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
404             "<?xml version=\"1.0\" encoding =  \t \n \r''{1}''?>\n<root>{2}</root>");
405 
406     private static final MessageFormat INFO = new MessageFormat(
407             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
408 
409     private static final Map<String, MessageFormat> XMLs = new HashMap<String, MessageFormat>();
410 
411     static {
412         XMLs.put(XML1, XML);
413         XMLs.put(XML2, XML_WITH_PROLOG);
414         XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
415         XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
416         XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
417     }
418 
419     /**
420      * @param bomType   no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
421      * @param xmlType   xml, xml-prolog, xml-prolog-charset
422      * @param streamEnc encoding of the stream
423      * @param prologEnc encoding of the prolog
424      * @return XML stream
425      * @throws IOException If an I/O error occurs
426      */
427     protected InputStream getXmlStream(final String bomType, final String xmlType,
428                                        final String streamEnc, final String prologEnc) throws IOException {
429         final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
430         int[] bom = BOMs.get(bomType);
431         if (bom == null) {
432             bom = new int[0];
433         }
434         for (final int element : bom) {
435             baos.write(element);
436         }
437         final Writer writer = new OutputStreamWriter(baos, streamEnc);
438         final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
439         writer.write(xmlDoc);
440 
441         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
442         writer.write("<da>\n");
443         for (int i = 0; i < 10000; i++) {
444             writer.write("<do/>\n");
445         }
446         writer.write("</da>\n");
447 
448         writer.close();
449         return new ByteArrayInputStream(baos.toByteArray());
450     }
451 
452     /**
453      * Create the XML.
454      */
455     private String getXML(final String bomType, final String xmlType,
456                           final String streamEnc, final String prologEnc) {
457         final MessageFormat xml = XMLs.get(xmlType);
458         final String info = INFO.format(new Object[]{bomType, xmlType, prologEnc});
459         final String xmlDoc = xml.format(new Object[]{streamEnc, prologEnc, info});
460         return xmlDoc;
461     }
462 }