View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.*;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.Reader;
25  import java.nio.charset.Charset;
26  import java.nio.charset.StandardCharsets;
27  
28  import org.apache.commons.io.ByteOrderMark;
29  import org.junit.Assert;
30  import org.junit.Assume;
31  import org.junit.Test;
32  import org.w3c.dom.Document;
33  import org.xml.sax.InputSource;
34  import org.xml.sax.SAXException;
35  import org.xml.sax.SAXParseException;
36  
37  import javax.xml.parsers.DocumentBuilder;
38  import javax.xml.parsers.DocumentBuilderFactory;
39  import javax.xml.parsers.ParserConfigurationException;
40  
41  /**
42   * Test case for {@link BOMInputStream}.
43   *
44   */
45  @SuppressWarnings("ResultOfMethodCallIgnored")
46  public class BOMInputStreamTest {
47      //----------------------------------------------------------------------------
48      //  Support code
49      //----------------------------------------------------------------------------
50  
51      /**
52       *  A mock InputStream that expects <code>close()</code> to be called.
53       */
54      private static class ExpectCloseInputStream extends InputStream {
55          private boolean _closeCalled;
56  
57          public void assertCloseCalled() {
58              assertTrue(_closeCalled);
59          }
60  
61          @Override
62          public void close() throws IOException {
63              _closeCalled = true;
64          }
65  
66          @Override
67          public int read() throws IOException {
68              return -1;
69          }
70      }
71  
72      private void assertData(final byte[] expected, final byte[] actual, final int len)
73          throws Exception {
74          assertEquals("length", expected.length, len);
75          for (int ii = 0; ii < expected.length; ii++) {
76              assertEquals("byte " + ii, expected[ii], actual[ii]);
77          }
78      }
79  
80      /**
81       *  Creates the underlying data stream, with or without BOM.
82       */
83      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
84          byte[] data = baseData;
85          if (addBOM) {
86              data = new byte[baseData.length + 2];
87              data[0] = (byte) 0xFE;
88              data[1] = (byte) 0xFF;
89              System.arraycopy(baseData, 0, data, 2, baseData.length);
90          }
91          return new ByteArrayInputStream(data);
92      }
93  
94      /**
95       *  Creates the underlying data stream, with or without BOM.
96       */
97      private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
98          byte[] data = baseData;
99          if (addBOM) {
100             data = new byte[baseData.length + 2];
101             data[0] = (byte) 0xFF;
102             data[1] = (byte) 0xFE;
103             System.arraycopy(baseData, 0, data, 2, baseData.length);
104         }
105         return new ByteArrayInputStream(data);
106     }
107 
108     /**
109      *  Creates the underlying data stream, with or without BOM.
110      */
111     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
112         byte[] data = baseData;
113         if (addBOM) {
114             data = new byte[baseData.length + 4];
115             data[0] = 0;
116             data[1] = 0;
117             data[2] = (byte) 0xFE;
118             data[3] = (byte) 0xFF;
119             System.arraycopy(baseData, 0, data, 4, baseData.length);
120         }
121         return new ByteArrayInputStream(data);
122     }
123 
124     /**
125      *  Creates the underlying data stream, with or without BOM.
126      */
127     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
128         byte[] data = baseData;
129         if (addBOM) {
130             data = new byte[baseData.length + 4];
131             data[0] = (byte) 0xFF;
132             data[1] = (byte) 0xFE;
133             data[2] = 0;
134             data[3] = 0;
135             System.arraycopy(baseData, 0, data, 4, baseData.length);
136         }
137         return new ByteArrayInputStream(data);
138     }
139 
140     /**
141      *  Creates the underlying data stream, with or without BOM.
142      */
143     private InputStream createUtf8DataStream(final byte[] baseData, final boolean addBOM) {
144         byte[] data = baseData;
145         if (addBOM) {
146             data = new byte[baseData.length + 3];
147             data[0] = (byte) 0xEF;
148             data[1] = (byte) 0xBB;
149             data[2] = (byte) 0xBF;
150             System.arraycopy(baseData, 0, data, 3, baseData.length);
151         }
152         return new ByteArrayInputStream(data);
153     }
154 
155     //----------------------------------------------------------------------------
156     //  Test cases
157     //----------------------------------------------------------------------------
158 
159     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
160         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
161         assertNotNull(doc);
162         assertEquals("X", doc.getFirstChild().getNodeName());
163     }
164 
165     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
166         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
167         assertNotNull(doc);
168         assertEquals("X", doc.getFirstChild().getNodeName());
169     }
170 
171     private void readBOMInputStreamTwice(final String resource) throws Exception {
172         final InputStream inputStream = this.getClass().getResourceAsStream(resource);
173         Assert.assertNotNull(inputStream);
174         final BOMInputStream bomInputStream = new BOMInputStream(inputStream);
175         bomInputStream.mark(1000000);
176 
177         this.readFile(bomInputStream);
178         bomInputStream.reset();
179         this.readFile(bomInputStream);
180         inputStream.close();
181         bomInputStream.close();
182     }
183 
184     private void readFile(final BOMInputStream bomInputStream) throws Exception {
185         int bytes;
186         final byte[] bytesFromStream = new byte[100];
187         do {
188             bytes = bomInputStream.read(bytesFromStream);
189         } while (bytes > 0);
190     }
191 
192     @Test
193     public void testAvailableWithBOM() throws Exception {
194         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
195         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
196         assertEquals(7, in.available());
197         in.close();
198     }
199 
200     @Test
201     public void testAvailableWithoutBOM() throws Exception {
202         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
203         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
204         assertEquals(4, in.available());
205         in.close();
206     }
207 
208     @Test
209     // this is here for coverage
210     public void testClose() throws Exception {
211         final ExpectCloseInputStream del = new ExpectCloseInputStream();
212         final InputStream in = new BOMInputStream(del);
213 
214         in.close();
215         del.assertCloseCalled();
216         del.close();
217     }
218 
219     @Test
220     public void testEmptyBufferWithBOM() throws Exception {
221         final byte[] data = new byte[] {};
222         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
223         final byte[] buf = new byte[1024];
224         assertEquals(-1, in.read(buf));
225         in.close();
226     }
227 
228     @Test
229     public void testEmptyBufferWithoutBOM() throws Exception {
230         final byte[] data = new byte[] {};
231         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
232         final byte[] buf = new byte[1024];
233         assertEquals(-1, in.read(buf));
234         in.close();
235     }
236 
237     @Test
238     public void testGetBOMFirstThenRead() throws Exception {
239         final byte[] data = new byte[] { 'A', 'B', 'C' };
240         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true));
241         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
242         assertTrue("hasBOM()", in.hasBOM());
243         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
244         assertEquals('A', in.read());
245         assertEquals('B', in.read());
246         assertEquals('C', in.read());
247         assertEquals(-1, in.read());
248         in.close();
249     }
250 
251     @Test
252     public void testGetBOMFirstThenReadInclude() throws Exception {
253         final byte[] data = new byte[] { 'A', 'B', 'C' };
254         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
255         assertTrue("hasBOM()", in.hasBOM());
256         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
257         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
258         assertEquals(0xEF, in.read());
259         assertEquals(0xBB, in.read());
260         assertEquals(0xBF, in.read());
261         assertEquals('A', in.read());
262         assertEquals('B', in.read());
263         assertEquals('C', in.read());
264         assertEquals(-1, in.read());
265         in.close();
266     }
267 
268     @Test
269     public void testLargeBufferWithBOM() throws Exception {
270         final byte[] data = new byte[] { 'A', 'B', 'C' };
271         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
272         final byte[] buf = new byte[1024];
273         assertData(data, buf, in.read(buf));
274         in.close();
275     }
276 
277     @Test
278     public void testLargeBufferWithoutBOM() throws Exception {
279         final byte[] data = new byte[] { 'A', 'B', 'C' };
280         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
281         final byte[] buf = new byte[1024];
282         assertData(data, buf, in.read(buf));
283         in.close();
284     }
285 
286     @Test
287     public void testLeadingNonBOMBufferedRead() throws Exception {
288         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
289         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
290         final byte[] buf = new byte[1024];
291         assertData(data, buf, in.read(buf));
292         in.close();
293     }
294 
295     @Test
296     public void testLeadingNonBOMSingleRead() throws Exception {
297         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
298         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
299         assertEquals(0xEF, in.read());
300         assertEquals(0xAB, in.read());
301         assertEquals(0xCD, in.read());
302         assertEquals(-1, in.read());
303         in.close();
304     }
305 
306     @Test
307     public void testMarkResetAfterReadWithBOM() throws Exception {
308         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
309         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
310         assertTrue(in.markSupported());
311 
312         in.read();
313         in.mark(10);
314 
315         in.read();
316         in.read();
317         in.reset();
318         assertEquals('B', in.read());
319         in.close();
320     }
321 
322     @Test
323     public void testMarkResetAfterReadWithoutBOM() throws Exception {
324         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
325         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
326         assertTrue(in.markSupported());
327 
328         in.read();
329         in.mark(10);
330 
331         in.read();
332         in.read();
333         in.reset();
334         assertEquals('B', in.read());
335         in.close();
336     }
337 
338     @Test
339     public void testMarkResetBeforeReadWithBOM() throws Exception {
340         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
341         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
342         assertTrue(in.markSupported());
343 
344         in.mark(10);
345 
346         in.read();
347         in.read();
348         in.reset();
349         assertEquals('A', in.read());
350         in.close();
351     }
352 
353     @Test
354     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
355         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
356         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
357         assertTrue(in.markSupported());
358 
359         in.mark(10);
360 
361         in.read();
362         in.read();
363         in.reset();
364         assertEquals('A', in.read());
365         in.close();
366     }
367 
368     @Test
369     public void testNoBoms() throws Exception {
370         final byte[] data = new byte[] { 'A', 'B', 'C' };
371         try {
372             (new BOMInputStream(createUtf8DataStream(data, true), false, (ByteOrderMark[])null)).close();
373             fail("Null BOMs, expected IllegalArgumentException");
374         } catch (final IllegalArgumentException e) {
375             // expected
376         }
377         try {
378             (new BOMInputStream(createUtf8DataStream(data, true), false, new ByteOrderMark[0])).close();
379             fail("Null BOMs, expected IllegalArgumentException");
380         } catch (final IllegalArgumentException e) {
381             // expected
382         }
383     }
384 
385 
386 
387 
388 
389     @Test
390     public void testReadEmpty() throws Exception {
391         final byte[] data = new byte[] {};
392         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
393         assertEquals(-1, in.read());
394         assertFalse("hasBOM()", in.hasBOM());
395         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
396         assertNull("getBOM", in.getBOM());
397         in.close();
398     }
399 
400     @Test
401     public void testReadSmall() throws Exception {
402         final byte[] data = new byte[] { 'A', 'B' };
403         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
404         assertEquals('A', in.read());
405         assertEquals('B', in.read());
406         assertEquals(-1, in.read());
407         assertFalse("hasBOM()", in.hasBOM());
408         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
409         assertNull("getBOM", in.getBOM());
410         in.close();
411     }
412 
413     @Test
414     public void testReadTwiceWithBOM() throws Exception {
415         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
416     }
417 
418     @Test
419     public void testReadTwiceWithoutBOM() throws Exception {
420         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
421     }
422 
423     @Test
424     public void testReadWithBOMInclude() throws Exception {
425         final byte[] data = new byte[] { 'A', 'B', 'C' };
426         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
427         assertEquals(0xEF, in.read());
428         assertEquals(0xBB, in.read());
429         assertEquals(0xBF, in.read());
430         assertEquals('A', in.read());
431         assertEquals('B', in.read());
432         assertEquals('C', in.read());
433         assertEquals(-1, in.read());
434         assertTrue("hasBOM()", in.hasBOM());
435         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
436         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
437         in.close();
438     }
439 
440     @Test
441     public void testReadWithBOMUtf16Be() throws Exception {
442         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
443         final BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE);
444         assertEquals(0, in.read());
445         assertEquals('A', in.read());
446         assertEquals(0, in.read());
447         assertEquals('B', in.read());
448         assertEquals(0, in.read());
449         assertEquals('C', in.read());
450         assertEquals(-1, in.read());
451         assertTrue("hasBOM()", in.hasBOM());
452         assertTrue("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
453         assertEquals("getBOM", ByteOrderMark.UTF_16BE, in.getBOM());
454         try {
455             in.hasBOM(ByteOrderMark.UTF_16LE);
456             fail("Expected IllegalArgumentException");
457         } catch (final IllegalArgumentException e) {
458             // expected - not configured for UTF-16LE
459         }
460         in.close();
461     }
462 
463     @Test
464     public void testReadWithBOMUtf16Le() throws Exception {
465         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
466         final BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE);
467         assertEquals('A', in.read());
468         assertEquals(0, in.read());
469         assertEquals('B', in.read());
470         assertEquals(0, in.read());
471         assertEquals('C', in.read());
472         assertEquals(0, in.read());
473         assertEquals(-1, in.read());
474         assertTrue("hasBOM()", in.hasBOM());
475         assertTrue("hasBOM(UTF-16LE)", in.hasBOM(ByteOrderMark.UTF_16LE));
476         assertEquals("getBOM", ByteOrderMark.UTF_16LE, in.getBOM());
477         try {
478             in.hasBOM(ByteOrderMark.UTF_16BE);
479             fail("Expected IllegalArgumentException");
480         } catch (final IllegalArgumentException e) {
481             // expected - not configured for UTF-16BE
482         }
483         in.close();
484     }
485 
486     @Test
487     public void testReadWithBOMUtf32Be() throws Exception {
488         Assume.assumeTrue(Charset.isSupported("UTF_32BE"));
489         final byte[] data = "ABC".getBytes("UTF_32BE");
490         final BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE);
491         assertEquals(0, in.read());
492         assertEquals(0, in.read());
493         assertEquals(0, in.read());
494         assertEquals('A', in.read());
495         assertEquals(0, in.read());
496         assertEquals(0, in.read());
497         assertEquals(0, in.read());
498         assertEquals('B', in.read());
499         assertEquals(0, in.read());
500         assertEquals(0, in.read());
501         assertEquals(0, in.read());
502         assertEquals('C', in.read());
503         assertEquals(-1, in.read());
504         assertTrue("hasBOM()", in.hasBOM());
505         assertTrue("hasBOM(UTF-32BE)", in.hasBOM(ByteOrderMark.UTF_32BE));
506         assertEquals("getBOM", ByteOrderMark.UTF_32BE, in.getBOM());
507         try {
508             in.hasBOM(ByteOrderMark.UTF_32LE);
509             fail("Expected IllegalArgumentException");
510         } catch (final IllegalArgumentException e) {
511             // expected - not configured for UTF-32LE
512         }
513         in.close();
514     }
515 
516     @Test
517     public void testReadWithBOMUtf32Le() throws Exception {
518         Assume.assumeTrue(Charset.isSupported("UTF_32LE"));
519         final byte[] data = "ABC".getBytes("UTF_32LE");
520         final BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE);
521         assertEquals('A', in.read());
522         assertEquals(0, in.read());
523         assertEquals(0, in.read());
524         assertEquals(0, in.read());
525         assertEquals('B', in.read());
526         assertEquals(0, in.read());
527         assertEquals(0, in.read());
528         assertEquals(0, in.read());
529         assertEquals('C', in.read());
530         assertEquals(0, in.read());
531         assertEquals(0, in.read());
532         assertEquals(0, in.read());
533         assertEquals(-1, in.read());
534         assertTrue("hasBOM()", in.hasBOM());
535         assertTrue("hasBOM(UTF-32LE)", in.hasBOM(ByteOrderMark.UTF_32LE));
536         assertEquals("getBOM", ByteOrderMark.UTF_32LE, in.getBOM());
537         try {
538             in.hasBOM(ByteOrderMark.UTF_32BE);
539             fail("Expected IllegalArgumentException");
540         } catch (final IllegalArgumentException e) {
541             // expected - not configured for UTF-32BE
542         }
543         in.close();
544     }
545 
546     @Test
547     public void testReadWithBOMUtf8() throws Exception {
548         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
549         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_8);
550         assertEquals('A', in.read());
551         assertEquals('B', in.read());
552         assertEquals('C', in.read());
553         assertEquals(-1, in.read());
554         assertTrue("hasBOM()", in.hasBOM());
555         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
556         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
557         try {
558             in.hasBOM(ByteOrderMark.UTF_16BE);
559             fail("Expected IllegalArgumentException");
560         } catch (final IllegalArgumentException e) {
561             // expected - not configured for UTF-16BE
562         }
563         in.close();
564     }
565 
566     @Test
567     public void testReadWithMultipleBOM() throws Exception {
568         final byte[] data = new byte[] { 'A', 'B', 'C' };
569         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true),
570                                             ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8);
571         assertEquals('A', in.read());
572         assertEquals('B', in.read());
573         assertEquals('C', in.read());
574         assertEquals(-1, in.read());
575         assertTrue("hasBOM()", in.hasBOM());
576         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
577         assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
578         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
579         in.close();
580     }
581 
582     @Test
583     public void testReadWithoutBOM() throws Exception {
584         final byte[] data = new byte[] { 'A', 'B', 'C' };
585         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
586         assertEquals('A', in.read());
587         assertEquals('B', in.read());
588         assertEquals('C', in.read());
589         assertEquals(-1, in.read());
590         assertFalse("hasBOM()", in.hasBOM());
591         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
592         assertNull("getBOM", in.getBOM());
593         in.close();
594     }
595 
596     @Test
597     public void testReadXmlWithBOMUcs2() throws Exception {
598         Assume.assumeFalse("This test does not pass on some IBM VMs xml parsers", System.getProperty("java.vendor").contains("IBM"));
599 
600         // UCS-2 is BE.
601         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
602         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
603         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
604         parseXml(createUtf16BeDataStream(data, true));
605     }
606 
607     @Test
608     public void testReadXmlWithBOMUcs4() throws Exception {
609         // UCS-4 is BE or LE?
610         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
611         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
612         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
613         // XML parser does not know what to do with UTF-32
614         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
615         // XML parser does not know what to do with UTF-32
616         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
617         parseXml(createUtf32BeDataStream(data, true));
618     }
619 
620     @Test
621     public void testReadXmlWithBOMUtf16Be() throws Exception {
622         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
623         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
624         parseXml(createUtf16BeDataStream(data, true));
625     }
626 
627     @Test
628     public void testReadXmlWithBOMUtf16Le() throws Exception {
629         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
630         parseXml(new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE));
631         parseXml(createUtf16LeDataStream(data, true));
632     }
633 
634     @Test
635     public void testReadXmlWithBOMUtf32Be() throws Exception {
636         Assume.assumeTrue("JVM and SAX need to support UTF_32BE for this", jvmAndSaxBothSupportCharset("UTF_32BE"));
637         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
638         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
639         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
640         parseXml(new XmlStreamReader(createUtf32BeDataStream(data, true)));
641     }
642 
643     @Test
644     public void testReadXmlWithBOMUtf32Le() throws Exception {
645         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
646         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
647         parseXml(new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE));
648         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
649         parseXml(new XmlStreamReader(createUtf32LeDataStream(data, true)));
650     }
651 
652     @Test
653     public void testReadXmlWithBOMUtf8() throws Exception {
654         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
655         parseXml(new BOMInputStream(createUtf8DataStream(data, true)));
656         parseXml(createUtf8DataStream(data, true));
657     }
658 
659     @Test
660     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
661         Assume.assumeTrue("JVM and SAX need to support UTF_32BE for this", jvmAndSaxBothSupportCharset("UTF_32BE"));
662         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
663         parseXml(new BOMInputStream(createUtf32BeDataStream(data, false)));
664         parseXml(createUtf32BeDataStream(data, false));
665     }
666 
667     @Test
668     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
669         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
670         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
671         parseXml(new BOMInputStream(createUtf32LeDataStream(data, false)));
672         parseXml(createUtf32BeDataStream(data, false));
673     }
674 
675     @Test
676     public void testSkipWithBOM() throws Exception {
677         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
678         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
679         in.skip(2L);
680         assertEquals('C', in.read());
681         in.close();
682     }
683 
684     @Test
685     public void testSkipWithoutBOM() throws Exception {
686         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
687         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
688         in.skip(2L);
689         assertEquals('C', in.read());
690         in.close();
691     }
692 
693 
694     @Test
695     public void skipReturnValueWithBom() throws IOException {
696         byte[] baseData = new byte[]{(byte) 0x31, (byte) 0x32, (byte) 0x33};
697         BOMInputStream is1 = new BOMInputStream(createUtf8DataStream(baseData, true));
698         assertEquals(2, is1.skip(2));
699         assertEquals((byte) 0x33, is1.read());
700         is1.close();
701     }
702 
703     @Test
704     public void skipReturnValueWithoutBom() throws IOException {
705         byte[] baseData = new byte[]{(byte) 0x31, (byte) 0x32, (byte) 0x33};
706         BOMInputStream is2 = new BOMInputStream(createUtf8DataStream(baseData, false));
707         assertEquals(2, is2.skip(2)); // IO-428
708         assertEquals((byte) 0x33, is2.read());
709         is2.close();
710     }
711 
712     @Test
713     public void testSmallBufferWithBOM() throws Exception {
714         final byte[] data = new byte[] { 'A', 'B', 'C' };
715         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
716         final byte[] buf = new byte[1024];
717         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
718         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
719         in.close();
720     }
721 
722     @Test
723     public void testSmallBufferWithoutBOM() throws Exception {
724         final byte[] data = new byte[] { 'A', 'B', 'C' };
725         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
726         final byte[] buf = new byte[1024];
727         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
728         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
729         in.close();
730     }
731 
732     @Test
733     // make sure that our support code works as expected
734     public void testSupportCode() throws Exception {
735         final InputStream in = createUtf8DataStream(new byte[] { 'A', 'B' }, true);
736         final byte[] buf = new byte[1024];
737         final int len = in.read(buf);
738         assertEquals(5, len);
739         assertEquals(0xEF, buf[0] & 0xFF);
740         assertEquals(0xBB, buf[1] & 0xFF);
741         assertEquals(0xBF, buf[2] & 0xFF);
742         assertEquals('A', buf[3] & 0xFF);
743         assertEquals('B', buf[4] & 0xFF);
744 
745         assertData(
746                 new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' },
747                 buf, len);
748     }
749 
750     private boolean jvmAndSaxBothSupportCharset(String charSetName) throws ParserConfigurationException, SAXException, IOException {
751         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
752     }
753 
754     private boolean doesSaxSupportCharacterSet(String charSetName) throws ParserConfigurationException, SAXException, IOException {
755         final byte[] data = ("<?xml version=\"1.0\" encoding=\"" + charSetName + "\"?><Z/>").getBytes(charSetName);
756         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
757         try {
758             final InputSource is = new InputSource(new ByteArrayInputStream(data));
759             is.setEncoding(charSetName);
760             documentBuilder.parse(is);
761         } catch (SAXParseException e) {
762             if (e.getMessage().contains(charSetName)) {
763                 return false;
764             }
765         }
766         return true;
767     }
768 }