View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertThrows;
21  import static org.junit.jupiter.api.Assertions.assertTrue;
22  import static org.junit.jupiter.api.Assertions.fail;
23  
24  import java.io.ByteArrayInputStream;
25  import java.io.ByteArrayOutputStream;
26  import java.io.File;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.OutputStreamWriter;
30  import java.io.Writer;
31  import java.net.URL;
32  import java.net.URLConnection;
33  import java.nio.charset.Charset;
34  import java.nio.charset.StandardCharsets;
35  import java.nio.file.Files;
36  import java.nio.file.Path;
37  import java.nio.file.Paths;
38  import java.nio.file.StandardOpenOption;
39  import java.text.MessageFormat;
40  import java.util.HashMap;
41  import java.util.Locale;
42  import java.util.Map;
43  
44  import org.apache.commons.io.CharsetsTest;
45  import org.apache.commons.io.IOUtils;
46  import org.apache.commons.io.function.IOFunction;
47  import org.junit.jupiter.api.Test;
48  import org.junit.jupiter.params.ParameterizedTest;
49  import org.junit.jupiter.params.provider.MethodSource;
50  import org.junitpioneer.jupiter.DefaultLocale;
51  
52  /**
53   * Tests {@link XmlStreamReader}.
54   */
55  public class XmlStreamReaderTest {
56  
57      private static final String ISO_8859_1 = StandardCharsets.ISO_8859_1.name();
58      private static final String US_ASCII = StandardCharsets.US_ASCII.name();
59      private static final String UTF_16 = StandardCharsets.UTF_16.name();
60      private static final String UTF_16LE = StandardCharsets.UTF_16LE.name();
61      private static final String UTF_16BE = StandardCharsets.UTF_16BE.name();
62      private static final String UTF_32 = "UTF-32";
63      private static final String UTF_32LE = "UTF-32LE";
64      private static final String UTF_32BE = "UTF-32BE";
65      private static final String UTF_8 = StandardCharsets.UTF_8.name();
66  
67      private static final String XML7 = "xml-prolog-encoding-no-version";
68      private static final String XML6 = "xml-prolog-encoding-new-line";
69      private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes";
70      private static final String XML4 = "xml-prolog-encoding-single-quotes";
71      private static final String XML3 = "xml-prolog-encoding-double-quotes";
72      private static final String XML2 = "xml-prolog";
73      private static final String XML1 = "xml";
74  
75      private static final String ENCODING_ATTRIBUTE_XML = "<?xml version=\"1.0\" ?> \n"
76              + "<atom:feed xmlns:atom=\"http://www.w3.org/2005/Atom\">\n"
77              + "\n"
78              + "  <atom:entry>\n"
79              + "    <atom:title encoding='base64'><![CDATA\n"
80              + "aW5nTGluZSIgLz4";
81  
82      private static final int[] NO_BOM_BYTES = {};
83  
84      private static final int[] UTF_16BE_BOM_BYTES = {0xFE, 0xFF};
85  
86      private static final int[] UTF_16LE_BOM_BYTES = {0xFF, 0XFE};
87  
88      private static final int[] UTF_32BE_BOM_BYTES = {0x00, 0x00, 0xFE, 0xFF};
89  
90      private static final int[] UTF_32LE_BOM_BYTES = {0xFF, 0XFE, 0x00, 0x00};
91  
92      private static final int[] UTF_8_BOM_BYTES = {0xEF, 0xBB, 0xBF};
93  
94      private static final Map<String, int[]> BOMs = new HashMap<>();
95  
96      static {
97          BOMs.put("no-bom", NO_BOM_BYTES);
98          BOMs.put("UTF-16BE-bom", UTF_16BE_BOM_BYTES);
99          BOMs.put("UTF-16LE-bom", UTF_16LE_BOM_BYTES);
100         BOMs.put("UTF-32BE-bom", UTF_32BE_BOM_BYTES);
101         BOMs.put("UTF-32LE-bom", UTF_32LE_BOM_BYTES);
102         BOMs.put("UTF-16-bom", NO_BOM_BYTES); // it's added by the writer
103         BOMs.put("UTF-8-bom", UTF_8_BOM_BYTES);
104     }
105 
106     private static final MessageFormat XML = new MessageFormat(
107             "<root>{2}</root>");
108 
109     private static final MessageFormat XML_WITH_PROLOG = new MessageFormat(
110             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
111 
112     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_NEW_LINES = new MessageFormat(
113             "<?xml\nversion\n=\n\"1.0\"\nencoding\n=\n\"{1}\"\n?>\n<root>{2}</root>");
114 
115     private static final MessageFormat XML_EXTERNAL_PARSED_ENTITY_NO_VERSION = new MessageFormat(
116             "<?xml\nencoding\n=\n\"{1}\"\n?>\n<root>{2}</root>");
117 
118     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat(
119             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
120 
121     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES = new MessageFormat(
122             "<?xml version=\"1.0\" encoding=''{1}''?>\n<root>{2}</root>");
123 
124     private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES = new MessageFormat(
125             "<?xml version=\"1.0\" encoding =  \t \n \r''{1}''?>\n<root>{2}</root>");
126 
127     private static final MessageFormat INFO = new MessageFormat(
128             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
129 
130     private static final Map<String, MessageFormat> XMLs = new HashMap<>();
131 
132     static {
133         XMLs.put(XML1, XML);
134         XMLs.put(XML2, XML_WITH_PROLOG);
135         XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES);
136         XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES);
137         XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES);
138         XMLs.put(XML6, XML_WITH_PROLOG_AND_ENCODING_NEW_LINES);
139         XMLs.put(XML7, XML_EXTERNAL_PARSED_ENTITY_NO_VERSION);
140     }
141 
142     /**
143      * Create the XML.
144      */
145     private String getXML(final String bomType, final String xmlType,
146                           final String streamEnc, final String prologEnc) {
147         final MessageFormat xml = XMLs.get(xmlType);
148         final String info = INFO.format(new Object[]{bomType, xmlType, prologEnc});
149         return xml.format(new Object[]{streamEnc, prologEnc, info});
150     }
151 
152     /**
153      * @param bomType   no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
154      * @param xmlType   xml, xml-prolog, xml-prolog-charset
155      * @param streamEnc encoding of the stream
156      * @param prologEnc encoding of the prolog
157      * @return XML stream
158      * @throws IOException If an I/O error occurs
159      */
160     protected InputStream getXmlInputStream(final String bomType, final String xmlType,
161         final String streamEnc, final String prologEnc) throws IOException {
162         final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
163         int[] bom = BOMs.get(bomType);
164         if (bom == null) {
165             bom = new int[0];
166         }
167         for (final int element : bom) {
168             baos.write(element);
169         }
170         try (Writer writer = new OutputStreamWriter(baos, streamEnc)) {
171             final String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
172             writer.write(xmlDoc);
173 
174             // PADDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
175             writer.write("<da>\n");
176             for (int i = 0; i < 10000; i++) {
177                 writer.write("<do/>\n");
178             }
179             writer.write("</da>\n");
180 
181         }
182         return new ByteArrayInputStream(baos.toByteArray());
183     }
184 
185     private void parseCharset(final String hdr, final String enc, final IOFunction<InputStream, XmlStreamReader> factory) throws Exception {
186         try (final InputStream stream = new ByteArrayInputStream(hdr.getBytes(StandardCharsets.UTF_8))) {
187             try (final XmlStreamReader xml = factory.apply(stream)) {
188                 assertEquals(enc.toUpperCase(Locale.ROOT), xml.getEncoding(), enc);
189             }
190         }
191     }
192 
193     public void testAlternateDefaultEncoding(final String contentType, final String bomEnc, final String streamEnc, final String prologEnc,
194             final String alternateEnc) throws Exception {
195         try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
196                 XmlStreamReader xmlReader = new XmlStreamReader(is, contentType, false, alternateEnc)) {
197             testAlternateDefaultEncoding(streamEnc, alternateEnc, xmlReader);
198         }
199         try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
200         // @formatter:off
201             XmlStreamReader xmlReader = XmlStreamReader.builder()
202                     .setInputStream(is)
203                     .setHttpContentType(contentType)
204                     .setLenient(false)
205                     .setCharset(alternateEnc)
206                     .get()) {
207             // @formatter:on
208             testAlternateDefaultEncoding(streamEnc, alternateEnc, xmlReader);
209         }
210     }
211 
212     private void testAlternateDefaultEncoding(final String streamEnc, final String alternateEnc, final XmlStreamReader xmlReader) {
213         assertEquals(xmlReader.getDefaultEncoding(), alternateEnc);
214         if (!streamEnc.equals(UTF_16)) {
215             // we can not assert things here because UTF-8, US-ASCII and
216             // ISO-8859-1 look alike for the chars used for detection
217             // (niallp 2010-10-06 - I re-instated the check below - the tests(6) passed)
218             final String enc = alternateEnc != null ? alternateEnc : streamEnc;
219             assertEquals(xmlReader.getEncoding(), enc);
220         } else {
221             // String enc = (alternateEnc != null) ? alternateEnc : streamEnc;
222             assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
223         }
224     }
225 
226     @Test
227     protected void testConstructorFileInput() throws IOException {
228         try (XmlStreamReader reader = new XmlStreamReader(new File("pom.xml"))) {
229             // do nothing
230         }
231         try (XmlStreamReader reader = XmlStreamReader.builder().setFile("pom.xml").get()) {
232             // do nothing
233         }
234     }
235 
236     @Test
237     protected void testConstructorFileInputNull() {
238         assertThrows(NullPointerException.class, () -> new XmlStreamReader((File) null));
239     }
240 
241     @Test
242     protected void testConstructorFileInputOpenOptions() throws IOException {
243         try (XmlStreamReader reader = new XmlStreamReader(new File("pom.xml"))) {
244             // do nothing
245         }
246         try (XmlStreamReader reader = XmlStreamReader.builder().setFile("pom.xml").setOpenOptions(StandardOpenOption.READ).get()) {
247             // do nothing
248         }
249     }
250 
251     @Test
252     protected void testConstructorInputStreamInput() throws IOException {
253         final Path path = Paths.get("pom.xml");
254         try (XmlStreamReader reader = new XmlStreamReader(Files.newInputStream(path))) {
255             // do nothing
256         }
257         try (@SuppressWarnings("resource")
258         XmlStreamReader reader = XmlStreamReader.builder().setInputStream(Files.newInputStream(path)).get()) {
259             // do nothing
260         }
261     }
262 
263     @Test
264     protected void testConstructorInputStreamInputNull() {
265         assertThrows(NullPointerException.class, () -> new XmlStreamReader((InputStream) null));
266     }
267 
268     @Test
269     protected void testConstructorPathInput() throws IOException {
270         try (XmlStreamReader reader = new XmlStreamReader(Paths.get("pom.xml"))) {
271             // do nothing
272         }
273         try (XmlStreamReader reader = XmlStreamReader.builder().setPath("pom.xml").get()) {
274             // do nothing
275         }
276     }
277 
278     @Test
279     protected void testConstructorPathInputNull() {
280         assertThrows(NullPointerException.class, () -> new XmlStreamReader((Path) null));
281     }
282 
283     @Test
284     protected void testConstructorURLConnectionInput() throws IOException {
285         try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/").openConnection(), UTF_8)) {
286             // do nothing
287         }
288     }
289 
290     @Test
291     protected void testConstructorURLConnectionInputNull() {
292         assertThrows(NullPointerException.class, () -> new XmlStreamReader((URLConnection) null, US_ASCII));
293     }
294 
295     @Test
296     protected void testConstructorURLInput() throws IOException {
297         try (XmlStreamReader reader = new XmlStreamReader(new URL("https://www.apache.org/"))) {
298             // do nothing
299         }
300     }
301 
302     @Test
303     protected void testConstructorURLInputNull() {
304         assertThrows(NullPointerException.class, () -> new XmlStreamReader((URL) null));
305     }
306 
307     // XML Stream generator
308 
309     @Test
310     public void testEncodingAttributeXML() throws Exception {
311         try (InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes(StandardCharsets.UTF_8));
312                 XmlStreamReader xmlReader = new XmlStreamReader(is, "", true)) {
313             assertEquals(xmlReader.getEncoding(), UTF_8);
314         }
315         try (InputStream is = new ByteArrayInputStream(ENCODING_ATTRIBUTE_XML.getBytes(StandardCharsets.UTF_8));
316                 // @formatter:off
317                 XmlStreamReader xmlReader = XmlStreamReader.builder()
318                     .setInputStream(is)
319                     .setHttpContentType("")
320                     .setLenient(true)
321                     .get()) {
322             // @formatter:on
323             assertEquals(xmlReader.getEncoding(), UTF_8);
324         }
325     }
326 
327     @Test
328     public void testHttp() throws Exception {
329         // niallp 2010-10-06 - remove following 2 tests - I reinstated
330         // checks for non-UTF-16 encodings (18 tests) and these failed
331         // _testHttpValid("application/xml", "no-bom", "US-ASCII", null);
332         // _testHttpValid("application/xml", "UTF-8-bom", "US-ASCII", null);
333         testHttpValid("application/xml", "UTF-8-bom", UTF_8, null);
334         testHttpValid("application/xml", "UTF-8-bom", UTF_8, UTF_8);
335         testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
336         testHttpValid("application/xml;charset=\"UTF-8\"", "UTF-8-bom", UTF_8, null);
337         testHttpValid("application/xml;charset='UTF-8'", "UTF-8-bom", UTF_8, null);
338         testHttpValid("application/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
339         testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
340         testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
341         testHttpValid("application/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
342 
343         testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
344         testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
345         testHttpInvalid("application/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
346 
347         testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
348         testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
349         testHttpInvalid("application/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
350 
351         testHttpInvalid("application/xml", "UTF-8-bom", US_ASCII, US_ASCII);
352         testHttpInvalid("application/xml;charset=UTF-16", UTF_16LE, UTF_8, UTF_8);
353         testHttpInvalid("application/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
354         testHttpInvalid("application/xml;charset=UTF-32", UTF_32LE, UTF_8, UTF_8);
355         testHttpInvalid("application/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
356 
357         testHttpValid("text/xml", "no-bom", US_ASCII, null);
358         testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8);
359         testHttpValid("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null);
360         testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null);
361         testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16);
362         testHttpValid("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
363         testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null);
364         testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32);
365         testHttpValid("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
366         testHttpValid("text/xml", "UTF-8-bom", US_ASCII, null);
367 
368         testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, null);
369         testAlternateDefaultEncoding("application/xml", "no-bom", US_ASCII, null, US_ASCII);
370         testAlternateDefaultEncoding("application/xml", "UTF-8-bom", UTF_8, null, UTF_8);
371         testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, null);
372         testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, US_ASCII);
373         testAlternateDefaultEncoding("text/xml", "no-bom", US_ASCII, null, UTF_8);
374 
375         testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null);
376         testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16);
377         testHttpInvalid("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE);
378         testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE);
379         testHttpInvalid("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null);
380 
381         testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null);
382         testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32);
383         testHttpInvalid("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE);
384         testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE);
385         testHttpInvalid("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null);
386 
387         testHttpLenient("text/xml", "no-bom", US_ASCII, null, US_ASCII);
388         testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, UTF_8, UTF_8);
389         testHttpLenient("text/xml;charset=UTF-8", "UTF-8-bom", UTF_8, null, UTF_8);
390         testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
391         testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
392         testHttpLenient("text/xml;charset=UTF-16", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
393         testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
394         testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
395         testHttpLenient("text/xml;charset=UTF-32", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
396         testHttpLenient("text/xml", "UTF-8-bom", US_ASCII, null, US_ASCII);
397 
398         testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, null, UTF_16BE);
399         testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16, UTF_16);
400         testHttpLenient("text/xml;charset=UTF-16BE", "UTF-16BE-bom", UTF_16BE, UTF_16BE, UTF_16BE);
401         testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, UTF_16BE, UTF_16BE);
402         testHttpLenient("text/xml;charset=UTF-16", "no-bom", UTF_16BE, null, UTF_16);
403 
404         testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, null, UTF_32BE);
405         testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32, UTF_32);
406         testHttpLenient("text/xml;charset=UTF-32BE", "UTF-32BE-bom", UTF_32BE, UTF_32BE, UTF_32BE);
407         testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, UTF_32BE, UTF_32BE);
408         testHttpLenient("text/xml;charset=UTF-32", "no-bom", UTF_32BE, null, UTF_32);
409 
410         testHttpLenient("text/html", "no-bom", US_ASCII, US_ASCII, US_ASCII);
411         testHttpLenient("text/html", "no-bom", US_ASCII, null, US_ASCII);
412         testHttpLenient("text/html;charset=UTF-8", "no-bom", US_ASCII, UTF_8, UTF_8);
413         testHttpLenient("text/html;charset=UTF-16BE", "no-bom", US_ASCII, UTF_8, UTF_8);
414         testHttpLenient("text/html;charset=UTF-32BE", "no-bom", US_ASCII, UTF_8, UTF_8);
415     }
416 
417     @Test
418     public void testHttpContent() throws Exception {
419         final String encoding = UTF_8;
420         final String xml = getXML("no-bom", XML3, encoding, encoding);
421         try (XmlStreamReader xmlReader = new XmlStreamReader(CharSequenceInputStream.builder().setCharSequence(xml).setCharset(encoding).get())) {
422             assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
423             assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
424         }
425     }
426 
427     protected void testHttpInvalid(final String cT, final String bomEnc, final String streamEnc,
428         final String prologEnc) throws Exception {
429         try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc)) {
430             try {
431                 new XmlStreamReader(is, cT, false).close();
432                 fail("It should have failed for HTTP Content-type " + cT + ", BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc);
433             } catch (final IOException ex) {
434                 assertTrue(ex.getMessage().contains("Illegal encoding,"));
435             }
436         }
437     }
438 
439     protected void testHttpLenient(final String cT, final String bomEnc, final String streamEnc,
440         final String prologEnc, final String shouldBe) throws Exception {
441         try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc);
442             XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true)) {
443             assertEquals(xmlReader.getEncoding(), shouldBe);
444         }
445     }
446 
447     public void testHttpValid(final String cT, final String bomEnc, final String streamEnc,
448         final String prologEnc) throws Exception {
449         try (InputStream is = getXmlInputStream(bomEnc, prologEnc == null ? XML1 : XML3, streamEnc, prologEnc);
450             XmlStreamReader xmlReader = new XmlStreamReader(is, cT, false)) {
451             if (!streamEnc.equals(UTF_16)) {
452                 // we can not assert things here because UTF-8, US-ASCII and
453                 // ISO-8859-1 look alike for the chars used for detection
454                 // (niallp 2010-10-06 - I re-instated the check below and removed the 2 tests that failed)
455                 assertEquals(xmlReader.getEncoding(), streamEnc);
456             } else {
457                 assertEquals(xmlReader.getEncoding().substring(0, streamEnc.length()), streamEnc);
458             }
459         }
460     }
461 
462     @ParameterizedTest(name = "{0}")
463     @MethodSource(CharsetsTest.AVAIL_CHARSETS)
464     public void testIO_815(final String csName) throws Exception {
465         final MessageFormat fmt = new MessageFormat("<?xml version=\"1.0\" encoding=''{0}''?>\n<root>text</root>");
466         final IOFunction<InputStream, XmlStreamReader> factoryCtor = XmlStreamReader::new;
467         final IOFunction<InputStream, XmlStreamReader> factoryBuilder = stream -> XmlStreamReader.builder().setInputStream(stream).get();
468         parseCharset(fmt.format(new Object[] { csName }), csName, factoryCtor);
469         parseCharset(fmt.format(new Object[] { csName }), csName, factoryBuilder);
470         for (final String alias : Charset.forName(csName).aliases()) {
471             parseCharset(fmt.format(new Object[] { alias }), alias, factoryCtor);
472             parseCharset(fmt.format(new Object[] { alias }), alias, factoryBuilder);
473         }
474     }
475 
476     // Turkish language has specific rules to convert dotted and dotless i character.
477     @Test
478     @DefaultLocale(language = "tr")
479     public void testLowerCaseEncodingWithTurkishLocale_IO_557() throws Exception {
480         final String[] encodings = { "iso8859-1", "us-ascii", "utf-8" }; // lower-case
481         for (final String encoding : encodings) {
482             final String xml = getXML("no-bom", XML3, encoding, encoding);
483             try (ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding));
484                     XmlStreamReader xmlReader = new XmlStreamReader(is)) {
485                 assertTrue(encoding.equalsIgnoreCase(xmlReader.getEncoding()), "Check encoding : " + encoding);
486                 assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
487             }
488         }
489     }
490 
491     @SuppressWarnings("resource")
492     protected void testRawBomInvalid(final String bomEnc, final String streamEnc,
493         final String prologEnc) throws Exception {
494         final InputStream is = getXmlInputStream(bomEnc, XML3, streamEnc, prologEnc);
495         XmlStreamReader xmlReader = null;
496         try {
497             xmlReader = XmlStreamReader.builder().setInputStream(is).setLenient(false).get();
498             final String foundEnc = xmlReader.getEncoding();
499             fail("Expected IOException for BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc
500                 + ": found " + foundEnc);
501         } catch (final IOException ex) {
502             assertTrue(ex.getMessage().contains("Illegal encoding,"));
503         }
504         if (xmlReader != null) {
505             xmlReader.close();
506         }
507     }
508 
509     @Test
510     public void testRawBomUtf16() throws Exception {
511         testRawBomValid(UTF_16BE);
512         testRawBomValid(UTF_16LE);
513         testRawBomValid(UTF_16);
514 
515         testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
516         testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
517         testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
518     }
519 
520     @Test
521     public void testRawBomUtf32() throws Exception {
522         testRawBomValid(UTF_32BE);
523         testRawBomValid(UTF_32LE);
524         testRawBomValid(UTF_32);
525 
526         testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
527         testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
528         testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
529     }
530 
531     @Test
532     public void testRawBomUtf8() throws Exception {
533         testRawBomValid(UTF_8);
534         testRawBomInvalid("UTF-8-bom", US_ASCII, US_ASCII);
535         testRawBomInvalid("UTF-8-bom", ISO_8859_1, ISO_8859_1);
536         testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16);
537         testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16BE);
538         testRawBomInvalid("UTF-8-bom", UTF_8, UTF_16LE);
539         testRawBomInvalid("UTF-16BE-bom", UTF_16BE, UTF_16LE);
540         testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_16BE);
541         testRawBomInvalid("UTF-16LE-bom", UTF_16LE, UTF_8);
542         testRawBomInvalid("UTF-32BE-bom", UTF_32BE, UTF_32LE);
543         testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_32BE);
544         testRawBomInvalid("UTF-32LE-bom", UTF_32LE, UTF_8);
545     }
546 
547     protected void testRawBomValid(final String encoding) throws Exception {
548         try (InputStream is = getXmlInputStream(encoding + "-bom", XML3, encoding, encoding);
549             XmlStreamReader xmlReader = new XmlStreamReader(is, false)) {
550             if (!encoding.equals(UTF_16) && !encoding.equals(UTF_32)) {
551                 assertEquals(xmlReader.getEncoding(), encoding);
552             } else {
553                 assertEquals(xmlReader.getEncoding().substring(0, encoding.length()), encoding);
554             }
555         }
556     }
557 
558     @Test
559     public void testRawContent() throws Exception {
560         final String encoding = UTF_8;
561         final String xml = getXML("no-bom", XML3, encoding, encoding);
562         try (XmlStreamReader xmlReader = new XmlStreamReader(CharSequenceInputStream.builder().setCharSequence(xml).setCharset(encoding).get())) {
563             assertEquals(xmlReader.getEncoding(), encoding, "Check encoding");
564             assertEquals(xml, IOUtils.toString(xmlReader), "Check content");
565         }
566     }
567 
568     @Test
569     public void testRawNoBomCp1047() throws Exception {
570         testRawNoBomValid("CP1047");
571     }
572 
573     protected void testRawNoBomInvalid(final String encoding) throws Exception {
574         try (final InputStream is = getXmlInputStream("no-bom", XML3, encoding, encoding)) {
575             final XmlStreamReader xmlStreamReader = new XmlStreamReader(is, false);
576             final IOException ex = assertThrows(IOException.class, xmlStreamReader::close);
577             assertTrue(ex.getMessage().contains("Invalid encoding,"));
578         }
579     }
580 
581     @Test
582     public void testRawNoBomIso8859_1() throws Exception {
583         testRawNoBomValid(ISO_8859_1);
584     }
585 
586     @Test
587     public void testRawNoBomUsAscii() throws Exception {
588         testRawNoBomValid(US_ASCII);
589     }
590 
591     @Test
592     public void testRawNoBomUtf16BE() throws Exception {
593         testRawNoBomValid(UTF_16BE);
594     }
595 
596     @Test
597     public void testRawNoBomUtf16LE() throws Exception {
598         testRawNoBomValid(UTF_16LE);
599     }
600 
601     @Test
602     public void testRawNoBomUtf32BE() throws Exception {
603         testRawNoBomValid(UTF_32BE);
604     }
605 
606     @Test
607     public void testRawNoBomUtf32LE() throws Exception {
608         testRawNoBomValid(UTF_32LE);
609     }
610 
611     @Test
612     public void testRawNoBomUtf8() throws Exception {
613         testRawNoBomValid(UTF_8);
614     }
615 
616     protected void testRawNoBomValid(final String encoding) throws Exception {
617         InputStream is = getXmlInputStream("no-bom", XML1, encoding, encoding);
618         XmlStreamReader xmlReader = new XmlStreamReader(is, false);
619         assertEquals(xmlReader.getEncoding(), UTF_8);
620         xmlReader.close();
621 
622         is = getXmlInputStream("no-bom", XML2, encoding, encoding);
623         xmlReader = new XmlStreamReader(is);
624         assertEquals(xmlReader.getEncoding(), UTF_8);
625         xmlReader.close();
626 
627         is = getXmlInputStream("no-bom", XML3, encoding, encoding);
628         xmlReader = new XmlStreamReader(is);
629         assertEquals(xmlReader.getEncoding(), encoding);
630         xmlReader.close();
631 
632         is = getXmlInputStream("no-bom", XML4, encoding, encoding);
633         xmlReader = new XmlStreamReader(is);
634         assertEquals(xmlReader.getEncoding(), encoding);
635         xmlReader.close();
636 
637         is = getXmlInputStream("no-bom", XML5, encoding, encoding);
638         xmlReader = new XmlStreamReader(is);
639         assertEquals(xmlReader.getEncoding(), encoding);
640         xmlReader.close();
641 
642         is = getXmlInputStream("no-bom", XML6, encoding, encoding);
643         xmlReader = new XmlStreamReader(is);
644         assertEquals(xmlReader.getEncoding(), encoding);
645         xmlReader.close();
646 
647         is = getXmlInputStream("no-bom", XML7, encoding, encoding);
648         xmlReader = new XmlStreamReader(is);
649         assertEquals(xmlReader.getEncoding(), encoding);
650         xmlReader.close();
651 }
652 }