001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.junit.Assert.assertEquals;
020import static org.junit.Assert.assertTrue;
021import static org.junit.Assert.fail;
022
023import java.io.ByteArrayInputStream;
024import java.io.ByteArrayOutputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.OutputStreamWriter;
028import java.io.Writer;
029import java.text.MessageFormat;
030import java.util.HashMap;
031import java.util.Map;
032
033import org.apache.commons.io.IOUtils;
034import org.junit.Test;
035
036/**
037 */
038public class XmlStreamReaderTest {
039    private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
040    private static final String XML4 = "xml-prolog-encoding-single-quotes";
041    private static final String XML3 = "xml-prolog-encoding-double-quotes";
042    private static final String XML2 = "xml-prolog";
043    private static final String XML1 = "xml";
044
045    protected void _testRawNoBomValid(final String encoding) throws Exception {
046        InputStream is = getXmlStream("no-bom", XML1, encoding, encoding);
047        XmlStreamReader xmlReader = new XmlStreamReader(is, false);
048        assertEquals(xmlReader.getEncoding(), "UTF-8");
049        xmlReader.close();
050
051        is = getXmlStream("no-bom", XML2, encoding, encoding);
052        xmlReader = new XmlStreamReader(is);
053        assertEquals(xmlReader.getEncoding(), "UTF-8");
054        xmlReader.close();
055
056        is = getXmlStream("no-bom", XML3, encoding, encoding);
057        xmlReader = new XmlStreamReader(is);
058        assertEquals(xmlReader.getEncoding(), encoding);
059        xmlReader.close();
060
061        is = getXmlStream("no-bom", XML4, encoding, encoding);
062        xmlReader = new XmlStreamReader(is);
063        assertEquals(xmlReader.getEncoding(), encoding);
064        xmlReader.close();
065
066        is = getXmlStream("no-bom", XML5, encoding, encoding);
067        xmlReader = new XmlStreamReader(is);
068        assertEquals(xmlReader.getEncoding(), encoding);
069        xmlReader.close();
070    }
071
072    protected void _testRawNoBomInvalid(final String encoding) throws Exception {
073        final InputStream is = getXmlStream("no-bom", XML3, encoding, encoding);
074        try {
075            new XmlStreamReader(is, false);
076            fail("It should have failed");
077        } catch (final IOException ex) {
078            assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
079        }
080    }
081
082    @Test
083    public void testRawNoBomUsAscii() throws Exception {
084        _testRawNoBomValid("US-ASCII");
085    }
086
087    @Test
088    public void testRawNoBomUtf8() throws Exception {
089        _testRawNoBomValid("UTF-8");
090    }
091
092    @Test
093    public void testRawNoBomUtf16BE() throws Exception {
094        _testRawNoBomValid("UTF-16BE");
095    }
096
097    @Test
098    public void testRawNoBomUtf16LE() throws Exception {
099        _testRawNoBomValid("UTF-16LE");
100    }
101
102    @Test
103    public void testRawNoBomUtf32BE() throws Exception {
104        _testRawNoBomValid("UTF-32BE");
105    }
106
107    @Test
108    public void testRawNoBomUtf32LE() throws Exception {
109        _testRawNoBomValid("UTF-32LE");
110    }
111
112    @Test
113    public void testRawNoBomIso8859_1() throws Exception {
114        _testRawNoBomValid("ISO-8859-1");
115    }
116
117    @Test
118    public void testRawNoBomCp1047() throws Exception {
119        _testRawNoBomValid("CP1047");
120    }
121
122    protected void _testRawBomValid(final String encoding) throws Exception {
123        final InputStream is = getXmlStream(encoding + "-bom", XML3, encoding,
124                encoding);
125        final XmlStreamReader xmlReader = new XmlStreamReader(is, false);
126        if (!encoding.equals("UTF-16") && !encoding.equals("UTF-32")) {
127            assertEquals(xmlReader.getEncoding(), encoding);
128        } else {
129            assertEquals(xmlReader.getEncoding()
130                    .substring(0, encoding.length()), encoding);
131        }
132        xmlReader.close();
133    }
134
135    protected void _testRawBomInvalid(final String bomEnc, final String streamEnc,
136            final String prologEnc) throws Exception {
137        final InputStream is = getXmlStream(bomEnc, XML3, streamEnc, prologEnc);
138        XmlStreamReader xmlReader = null;
139        try {
140            xmlReader = new XmlStreamReader(is, false);
141            final String foundEnc = xmlReader.getEncoding();
142            fail("Expected IOException for BOM " + bomEnc + ", streamEnc "
143                    + streamEnc + " and prologEnc " + prologEnc + ": found "
144                    + foundEnc);
145        } catch (final IOException ex) {
146            assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
147        }
148        if (xmlReader != null) {
149            xmlReader.close();
150        }
151    }
152
153    @Test
154    public void testRawBomUtf8() throws Exception {
155        _testRawBomValid("UTF-8");
156        _testRawBomInvalid("UTF-8-bom", "US-ASCII", "US-ASCII");
157        _testRawBomInvalid("UTF-8-bom", "ISO-8859-1", "ISO-8859-1");
158        _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16");
159        _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16BE");
160        _testRawBomInvalid("UTF-8-bom", "UTF-8", "UTF-16LE");
161        _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
162        _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
163        _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
164        _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
165        _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
166        _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
167    }
168
169    @Test
170    public void testRawBomUtf16() throws Exception {
171        _testRawBomValid("UTF-16BE");
172        _testRawBomValid("UTF-16LE");
173        _testRawBomValid("UTF-16");
174
175        _testRawBomInvalid("UTF-16BE-bom", "UTF-16BE", "UTF-16LE");
176        _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-16BE");
177        _testRawBomInvalid("UTF-16LE-bom", "UTF-16LE", "UTF-8");
178    }
179
180    @Test
181    public void testRawBomUtf32() throws Exception {
182        _testRawBomValid("UTF-32BE");
183        _testRawBomValid("UTF-32LE");
184        _testRawBomValid("UTF-32");
185
186        _testRawBomInvalid("UTF-32BE-bom", "UTF-32BE", "UTF-32LE");
187        _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-32BE");
188        _testRawBomInvalid("UTF-32LE-bom", "UTF-32LE", "UTF-8");
189}
190
191
192    @Test
193    public void testHttp() throws Exception {
194        // niallp 2010-10-06 - remove following 2 tests - I reinstated
195        // checks for non-UTF-16 encodings (18 tests) and these failed
196        // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
197        // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
198        _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", null);
199        _testHttpValid("application/xml", "UTF-8-bom", "UTF-8", "UTF-8");
200        _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
201        _testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", "UTF-8", null);
202        _testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", "UTF-8", null);
203        _testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
204        _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
205        _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
206        _testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
207
208        _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
209        _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
210        _testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
211
212        _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
213        _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
214        _testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
215
216        _testHttpInvalid("application/xml", "UTF-8-bom", "US-ASCII", "US-ASCII");
217        _testHttpInvalid("application/xml;charset=UTF-16", "UTF-16LE", "UTF-8", "UTF-8");
218        _testHttpInvalid("application/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
219        _testHttpInvalid("application/xml;charset=UTF-32", "UTF-32LE", "UTF-8", "UTF-8");
220        _testHttpInvalid("application/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
221
222        _testHttpValid("text/xml", "no-bom", "US-ASCII", null);
223        _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8");
224        _testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null);
225        _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null);
226        _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
227        _testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
228        _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null);
229        _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
230        _testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
231        _testHttpValid("text/xml", "UTF-8-bom", "US-ASCII", null);
232
233        _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, null);
234        _testAlternateDefaultEncoding("application/xml", "no-bom", "US-ASCII", null, "US-ASCII");
235        _testAlternateDefaultEncoding("application/xml", "UTF-8-bom", "UTF-8", null, "UTF-8");
236        _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, null);
237        _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
238        _testAlternateDefaultEncoding("text/xml", "no-bom", "US-ASCII", null, "UTF-8");
239
240        _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null);
241        _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16");
242        _testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE");
243        _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE");
244        _testHttpInvalid("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null);
245
246        _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null);
247        _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32");
248        _testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE");
249        _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE");
250        _testHttpInvalid("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null);
251
252        _testHttpLenient("text/xml", "no-bom", "US-ASCII", null, "US-ASCII");
253        _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", "UTF-8", "UTF-8");
254        _testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", "UTF-8", null, "UTF-8");
255        _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
256        _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
257        _testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
258        _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
259        _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
260        _testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
261        _testHttpLenient("text/xml", "UTF-8-bom", "US-ASCII", null, "US-ASCII");
262
263        _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", null, "UTF-16BE");
264        _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16", "UTF-16");
265        _testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
266        _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", "UTF-16BE", "UTF-16BE");
267        _testHttpLenient("text/xml;charset=UTF-16", "no-bom", "UTF-16BE", null, "UTF-16");
268
269        _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", null, "UTF-32BE");
270        _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32", "UTF-32");
271        _testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
272        _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", "UTF-32BE", "UTF-32BE");
273        _testHttpLenient("text/xml;charset=UTF-32", "no-bom", "UTF-32BE", null, "UTF-32");
274
275        _testHttpLenient("text/html", "no-bom", "US-ASCII", "US-ASCII", "US-ASCII");
276        _testHttpLenient("text/html", "no-bom", "US-ASCII", null, "US-ASCII");
277        _testHttpLenient("text/html;charset=UTF-8", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
278        _testHttpLenient("text/html;charset=UTF-16BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
279        _testHttpLenient("text/html;charset=UTF-32BE", "no-bom", "US-ASCII", "UTF-8", "UTF-8");
280    }
281
282    @Test
283    public void testRawContent() throws Exception {
284        final String encoding = "UTF-8";
285        final String xml = getXML("no-bom", XML3, encoding, encoding);
286        final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
287        final XmlStreamReader xmlReader = new XmlStreamReader(is);
288        assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
289        assertEquals("Check content", xml, IOUtils.toString(xmlReader));
290    }
291
292    @Test
293    public void testHttpContent() throws Exception {
294        final String encoding = "UTF-8";
295        final String xml = getXML("no-bom", XML3, encoding, encoding);
296        final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
297        final XmlStreamReader xmlReader = new XmlStreamReader(is, encoding);
298        assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
299        assertEquals("Check content", xml, IOUtils.toString(xmlReader));
300    }
301
302    public void _testAlternateDefaultEncoding(final String cT, final String bomEnc,
303            final String streamEnc, final String prologEnc, final String alternateEnc)
304            throws Exception {
305        final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML1
306                : XML3, streamEnc, prologEnc);
307        final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false, alternateEnc);
308        if (!streamEnc.equals("UTF-16")) {
309            // we can not assert things here because UTF-8, US-ASCII and
310            // ISO-8859-1 look alike for the chars used for detection
311            // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed)
312            final String enc = alternateEnc != null ? alternateEnc : streamEnc;
313            assertEquals(xmlReader.getEncoding(), enc);
314        } else {
315            //String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
316            assertEquals(xmlReader.getEncoding().substring(0,
317                    streamEnc.length()), streamEnc);
318        }
319        xmlReader.close();
320    }
321
322    public void _testHttpValid(final String cT, final String bomEnc, final String streamEnc,
323            final String prologEnc) throws Exception {
324        final InputStream is = getXmlStream(bomEnc,
325                prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
326        final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false);
327        if (!streamEnc.equals("UTF-16")) {
328            // we can not assert things here because UTF-8, US-ASCII and
329            // ISO-8859-1 look alike for the chars used for detection
330            // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed)
331            assertEquals(xmlReader.getEncoding(), streamEnc);
332        } else {
333            assertEquals(xmlReader.getEncoding().substring(0,
334                    streamEnc.length()), streamEnc);
335        }
336        xmlReader.close();
337    }
338
339    protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
340            final String prologEnc) throws Exception {
341        final InputStream is = getXmlStream(bomEnc,
342                prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
343        try {
344            new XmlStreamReader(is, cT, false);
345            fail("It should have failed for HTTP Content-type " + cT + ", BOM "
346                    + bomEnc + ", streamEnc " + streamEnc + " and prologEnc "
347                    + prologEnc);
348        } catch (final IOException ex) {
349            assertTrue(ex.getMessage().indexOf("Invalid encoding,") > -1);
350        }
351    }
352
353    protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
354            final String prologEnc, final String shouldbe) throws Exception {
355        final InputStream is = getXmlStream(bomEnc,
356                prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
357        final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true);
358        assertEquals(xmlReader.getEncoding(), shouldbe);
359        xmlReader.close();
360    }
361
362    private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
363            + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
364            + "\n"
365            + "  <atom:entry>\n"
366            + "    <atom:title encoding='base64'><![CDATA\n"
367            + "aW5nTGluZSIgLz4";
368
369    @Test
370    public void testEncodingAttributeXML() throws Exception {
371        final InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML
372                .getBytes("UTF-8"));
373        final XmlStreamReader xmlReader = new XmlStreamReader(is, "", true);
374        assertEquals(xmlReader.getEncoding(), "UTF-8");
375        xmlReader.close();
376    }
377
378    // XML Stream generator
379
380    private static final int[] NO_BOM_BYTES = {};
381    private static final int[] UTF_16BE_BOM_BYTES = { 0xFE, 0xFF };
382    private static final int[] UTF_16LE_BOM_BYTES = { 0xFF, 0XFE };
383    private static final int[] UTF_32BE_BOM_BYTES = { 0x00, 0x00, 0xFE, 0xFF };
384    private static final int[] UTF_32LE_BOM_BYTES = { 0xFF, 0XFE, 0x00, 0x00 };
385    private static final int[] UTF_8_BOM_BYTES = { 0xEF, 0xBB, 0xBF };
386
387    private static final Map<String, int[]> BOMs = new HashMap<String, int[]>();
388
389    static {
390        BOMs.put("no-bom", NO_BOM_BYTES);
391        BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
392        BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
393        BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
394        BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
395        BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
396        BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
397    }
398
399    private static final MessageFormat XML = new MessageFormat(
400            "<root>{2}</root>");
401    private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
402            "<?xml version=\"1.0\"?>\n<root>{2}</root>");
403    private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
404            "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
405    private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
406            "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
407    private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
408            "<?xml version=\"1.0\" encoding =  \t \n \r''{1}''?>\n<root>{2}</root>");
409
410    private static final MessageFormat INFO = new MessageFormat(
411            "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
412
413    private static final Map<String,MessageFormat> XMLs = new HashMap<String,MessageFormat>();
414
415    static {
416        XMLs.put(XML1, XML);
417        XMLs.put(XML2, XML_WITH_PROLOG);
418        XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
419        XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
420        XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
421    }
422
423    /**
424     *
425     * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
426     * @param xmlType xml, xml-prolog, xml-prolog-charset
427     * @return XML stream
428     */
429    protected InputStream getXmlStream(final String bomType, final String xmlType,
430            final String streamEnc, final String prologEnc) throws IOException {
431        final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
432        int[] bom = BOMs.get(bomType);
433        if (bom == null) {
434            bom = new int[0];
435        }
436        for (final int element : bom) {
437            baos.write(element);
438        }
439        final Writer writer = new OutputStreamWriter(baos, streamEnc);
440        final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
441        writer.write(xmlDoc);
442
443        // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
444        writer.write("<da>\n");
445        for (int i = 0; i < 10000; i++) {
446            writer.write("<do/>\n");
447        }
448        writer.write("</da>\n");
449
450        writer.close();
451        return new ByteArrayInputStream(baos.toByteArray());
452    }
453
454    /**
455     * Create the XML.
456     */
457    private String getXML(final String bomType, final String xmlType,
458            final String streamEnc, final String prologEnc) {
459        final MessageFormat xml = XMLs.get(xmlType);
460        final String info = INFO.format(new Object[] { bomType, xmlType, prologEnc });
461        final String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info });
462        return xmlDoc;
463    }
464}