View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.assertEquals;
20  import static org.junit.Assert.assertFalse;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertNull;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import java.io.ByteArrayInputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.Reader;
30  import java.nio.charset.Charset;
31  
32  import javax.xml.parsers.DocumentBuilderFactory;
33  import javax.xml.parsers.ParserConfigurationException;
34  
35  import org.apache.commons.io.ByteOrderMark;
36  import org.apache.commons.io.Charsets;
37  import org.junit.Assert;
38  import org.junit.Assume;
39  import org.junit.Test;
40  import org.w3c.dom.Document;
41  import org.xml.sax.InputSource;
42  import org.xml.sax.SAXException;
43  
44  /**
45   * Test case for {@link BOMInputStream}.
46   *
47   * @version $Id: BOMInputStreamTest.java 1471765 2013-04-24 23:20:29Z sebb $
48   */
49  public class BOMInputStreamTest {
50      //----------------------------------------------------------------------------
51      //  Support code
52      //----------------------------------------------------------------------------
53  
54      /**
55       *  A mock InputStream that expects <code>close()</code> to be called.
56       */
57      private static class ExpectCloseInputStream extends InputStream {
58          private boolean _closeCalled;
59  
60          public void assertCloseCalled() {
61              assertTrue(_closeCalled);
62          }
63  
64          @Override
65          public void close() throws IOException {
66              _closeCalled = true;
67          }
68  
69          @Override
70          public int read() throws IOException {
71              return -1;
72          }
73      }
74  
75      private void assertData(final byte[] expected, final byte[] actual, final int len)
76          throws Exception {
77          assertEquals("length", expected.length, len);
78          for (int ii = 0; ii < expected.length; ii++) {
79              assertEquals("byte " + ii, expected[ii], actual[ii]);
80          }
81      }
82  
83      /**
84       *  Creates the underlying data stream, with or without BOM.
85       */
86      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
87          byte[] data = baseData;
88          if (addBOM) {
89              data = new byte[baseData.length + 2];
90              data[0] = (byte) 0xFE;
91              data[1] = (byte) 0xFF;
92              System.arraycopy(baseData, 0, data, 2, baseData.length);
93          }
94          return new ByteArrayInputStream(data);
95      }
96  
97      /**
98       *  Creates the underlying data stream, with or without BOM.
99       */
100     private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
101         byte[] data = baseData;
102         if (addBOM) {
103             data = new byte[baseData.length + 2];
104             data[0] = (byte) 0xFF;
105             data[1] = (byte) 0xFE;
106             System.arraycopy(baseData, 0, data, 2, baseData.length);
107         }
108         return new ByteArrayInputStream(data);
109     }
110 
111     /**
112      *  Creates the underlying data stream, with or without BOM.
113      */
114     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
115         byte[] data = baseData;
116         if (addBOM) {
117             data = new byte[baseData.length + 4];
118             data[0] = 0;
119             data[1] = 0;
120             data[2] = (byte) 0xFE;
121             data[3] = (byte) 0xFF;
122             System.arraycopy(baseData, 0, data, 4, baseData.length);
123         }
124         return new ByteArrayInputStream(data);
125     }
126 
127     /**
128      *  Creates the underlying data stream, with or without BOM.
129      */
130     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
131         byte[] data = baseData;
132         if (addBOM) {
133             data = new byte[baseData.length + 4];
134             data[0] = (byte) 0xFF;
135             data[1] = (byte) 0xFE;
136             data[2] = 0;
137             data[3] = 0;
138             System.arraycopy(baseData, 0, data, 4, baseData.length);
139         }
140         return new ByteArrayInputStream(data);
141     }
142 
143     /**
144      *  Creates the underlying data stream, with or without BOM.
145      */
146     private InputStream createUtf8DataStream(final byte[] baseData, final boolean addBOM) {
147         byte[] data = baseData;
148         if (addBOM) {
149             data = new byte[baseData.length + 3];
150             data[0] = (byte) 0xEF;
151             data[1] = (byte) 0xBB;
152             data[2] = (byte) 0xBF;
153             System.arraycopy(baseData, 0, data, 3, baseData.length);
154         }
155         return new ByteArrayInputStream(data);
156     }
157 
158     //----------------------------------------------------------------------------
159     //  Test cases
160     //----------------------------------------------------------------------------
161 
162     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
163         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
164         assertNotNull(doc);
165         assertEquals("X", doc.getFirstChild().getNodeName());
166     }
167 
168     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
169         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
170         assertNotNull(doc);
171         assertEquals("X", doc.getFirstChild().getNodeName());
172     }
173 
174     private void readBOMInputStreamTwice(final String resource) throws Exception {
175         final InputStream inputStream = this.getClass().getResourceAsStream(resource);
176         Assert.assertNotNull(inputStream);
177         final BOMInputStream bomInputStream = new BOMInputStream(inputStream);
178         bomInputStream.mark(1000000);
179 
180         this.readFile(bomInputStream);
181         bomInputStream.reset();
182         this.readFile(bomInputStream);
183     }
184 
185     private void readFile(final BOMInputStream bomInputStream) throws Exception {
186         int bytes = 0;
187         final byte[] bytesFromStream = new byte[100];
188         do {
189             bytes = bomInputStream.read(bytesFromStream);
190         } while (bytes > 0);
191     }
192 
193     @Test
194     public void testAvailableWithBOM() throws Exception {
195         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
196         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
197         assertEquals(7, in.available());
198         in.close();
199     }
200 
201     @Test
202     public void testAvailableWithoutBOM() throws Exception {
203         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
204         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
205         assertEquals(4, in.available());
206         in.close();
207     }
208 
209     @Test
210     // this is here for coverage
211     public void testClose() throws Exception {
212         final ExpectCloseInputStream del = new ExpectCloseInputStream();
213         final InputStream in = new BOMInputStream(del);
214 
215         in.close();
216         del.assertCloseCalled();
217     }
218 
219     @Test
220     public void testEmptyBufferWithBOM() throws Exception {
221         final byte[] data = new byte[] {};
222         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
223         final byte[] buf = new byte[1024];
224         assertEquals(-1, in.read(buf));
225         in.close();
226     }
227 
228     @Test
229     public void testEmptyBufferWithoutBOM() throws Exception {
230         final byte[] data = new byte[] {};
231         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
232         final byte[] buf = new byte[1024];
233         assertEquals(-1, in.read(buf));
234         in.close();
235     }
236 
237     @Test
238     public void testGetBOMFirstThenRead() throws Exception {
239         final byte[] data = new byte[] { 'A', 'B', 'C' };
240         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true));
241         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
242         assertTrue("hasBOM()", in.hasBOM());
243         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
244         assertEquals('A', in.read());
245         assertEquals('B', in.read());
246         assertEquals('C', in.read());
247         assertEquals(-1, in.read());
248         in.close();
249     }
250 
251     @Test
252     public void testGetBOMFirstThenReadInclude() throws Exception {
253         final byte[] data = new byte[] { 'A', 'B', 'C' };
254         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
255         assertTrue("hasBOM()", in.hasBOM());
256         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
257         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
258         assertEquals(0xEF, in.read());
259         assertEquals(0xBB, in.read());
260         assertEquals(0xBF, in.read());
261         assertEquals('A', in.read());
262         assertEquals('B', in.read());
263         assertEquals('C', in.read());
264         assertEquals(-1, in.read());
265         in.close();
266     }
267 
268     @Test
269     public void testLargeBufferWithBOM() throws Exception {
270         final byte[] data = new byte[] { 'A', 'B', 'C' };
271         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
272         final byte[] buf = new byte[1024];
273         assertData(data, buf, in.read(buf));
274         in.close();
275     }
276 
277     @Test
278     public void testLargeBufferWithoutBOM() throws Exception {
279         final byte[] data = new byte[] { 'A', 'B', 'C' };
280         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
281         final byte[] buf = new byte[1024];
282         assertData(data, buf, in.read(buf));
283         in.close();
284     }
285 
286     @Test
287     public void testLeadingNonBOMBufferedRead() throws Exception {
288         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
289         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
290         final byte[] buf = new byte[1024];
291         assertData(data, buf, in.read(buf));
292         in.close();
293     }
294 
295     @Test
296     public void testLeadingNonBOMSingleRead() throws Exception {
297         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
298         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
299         assertEquals(0xEF, in.read());
300         assertEquals(0xAB, in.read());
301         assertEquals(0xCD, in.read());
302         assertEquals(-1, in.read());
303         in.close();
304     }
305 
306     @Test
307     public void testMarkResetAfterReadWithBOM() throws Exception {
308         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
309         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
310         assertTrue(in.markSupported());
311 
312         in.read();
313         in.mark(10);
314 
315         in.read();
316         in.read();
317         in.reset();
318         assertEquals('B', in.read());
319         in.close();
320     }
321 
322     @Test
323     public void testMarkResetAfterReadWithoutBOM() throws Exception {
324         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
325         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
326         assertTrue(in.markSupported());
327 
328         in.read();
329         in.mark(10);
330 
331         in.read();
332         in.read();
333         in.reset();
334         assertEquals('B', in.read());
335         in.close();
336     }
337 
338     @Test
339     public void testMarkResetBeforeReadWithBOM() throws Exception {
340         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
341         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
342         assertTrue(in.markSupported());
343 
344         in.mark(10);
345 
346         in.read();
347         in.read();
348         in.reset();
349         assertEquals('A', in.read());
350         in.close();
351     }
352 
353     @Test
354     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
355         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
356         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
357         assertTrue(in.markSupported());
358 
359         in.mark(10);
360 
361         in.read();
362         in.read();
363         in.reset();
364         assertEquals('A', in.read());
365         in.close();
366     }
367 
368     @Test
369     public void testNoBoms() throws Exception {
370         final byte[] data = new byte[] { 'A', 'B', 'C' };
371         try {
372             new BOMInputStream(createUtf8DataStream(data, true), false, (ByteOrderMark[])null);
373             fail("Null BOMs, expected IllegalArgumentException");
374         } catch (final IllegalArgumentException e) {
375             // expected
376         }
377         try {
378             new BOMInputStream(createUtf8DataStream(data, true), false, new ByteOrderMark[0]);
379             fail("Null BOMs, expected IllegalArgumentException");
380         } catch (final IllegalArgumentException e) {
381             // expected
382         }
383     }
384 
385     @Test
386     public void testReadEmpty() throws Exception {
387         final byte[] data = new byte[] {};
388         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
389         assertEquals(-1, in.read());
390         assertFalse("hasBOM()", in.hasBOM());
391         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
392         assertNull("getBOM", in.getBOM());
393         in.close();
394     }
395 
396     @Test
397     public void testReadSmall() throws Exception {
398         final byte[] data = new byte[] { 'A', 'B' };
399         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
400         assertEquals('A', in.read());
401         assertEquals('B', in.read());
402         assertEquals(-1, in.read());
403         assertFalse("hasBOM()", in.hasBOM());
404         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
405         assertNull("getBOM", in.getBOM());
406         in.close();
407     }
408 
409     @Test
410     public void testReadTwiceWithBOM() throws Exception {
411         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
412     }
413 
414     @Test
415     public void testReadTwiceWithoutBOM() throws Exception {
416         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
417     }
418 
419     @Test
420     public void testReadWithBOMInclude() throws Exception {
421         final byte[] data = new byte[] { 'A', 'B', 'C' };
422         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
423         assertEquals(0xEF, in.read());
424         assertEquals(0xBB, in.read());
425         assertEquals(0xBF, in.read());
426         assertEquals('A', in.read());
427         assertEquals('B', in.read());
428         assertEquals('C', in.read());
429         assertEquals(-1, in.read());
430         assertTrue("hasBOM()", in.hasBOM());
431         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
432         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
433         in.close();
434     }
435 
436     @Test
437     public void testReadWithBOMUtf16Be() throws Exception {
438         final byte[] data = "ABC".getBytes(Charsets.UTF_16BE);
439         final BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE);
440         assertEquals(0, in.read());
441         assertEquals('A', in.read());
442         assertEquals(0, in.read());
443         assertEquals('B', in.read());
444         assertEquals(0, in.read());
445         assertEquals('C', in.read());
446         assertEquals(-1, in.read());
447         assertTrue("hasBOM()", in.hasBOM());
448         assertTrue("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
449         assertEquals("getBOM", ByteOrderMark.UTF_16BE, in.getBOM());
450         try {
451             in.hasBOM(ByteOrderMark.UTF_16LE);
452             fail("Expected IllegalArgumentException");
453         } catch (final IllegalArgumentException e) {
454             // expected - not configured for UTF-16LE
455         }
456         in.close();
457     }
458 
459     @Test
460     public void testReadWithBOMUtf16Le() throws Exception {
461         final byte[] data = "ABC".getBytes(Charsets.UTF_16LE);
462         final BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE);
463         assertEquals('A', in.read());
464         assertEquals(0, in.read());
465         assertEquals('B', in.read());
466         assertEquals(0, in.read());
467         assertEquals('C', in.read());
468         assertEquals(0, in.read());
469         assertEquals(-1, in.read());
470         assertTrue("hasBOM()", in.hasBOM());
471         assertTrue("hasBOM(UTF-16LE)", in.hasBOM(ByteOrderMark.UTF_16LE));
472         assertEquals("getBOM", ByteOrderMark.UTF_16LE, in.getBOM());
473         try {
474             in.hasBOM(ByteOrderMark.UTF_16BE);
475             fail("Expected IllegalArgumentException");
476         } catch (final IllegalArgumentException e) {
477             // expected - not configured for UTF-16BE
478         }
479         in.close();
480     }
481 
482     @Test
483     public void testReadWithBOMUtf32Be() throws Exception {
484         Assume.assumeTrue(Charset.isSupported("UTF_32BE"));
485         final byte[] data = "ABC".getBytes("UTF_32BE");
486         final BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE);
487         assertEquals(0, in.read());
488         assertEquals(0, in.read());
489         assertEquals(0, in.read());
490         assertEquals('A', in.read());
491         assertEquals(0, in.read());
492         assertEquals(0, in.read());
493         assertEquals(0, in.read());
494         assertEquals('B', in.read());
495         assertEquals(0, in.read());
496         assertEquals(0, in.read());
497         assertEquals(0, in.read());
498         assertEquals('C', in.read());
499         assertEquals(-1, in.read());
500         assertTrue("hasBOM()", in.hasBOM());
501         assertTrue("hasBOM(UTF-32BE)", in.hasBOM(ByteOrderMark.UTF_32BE));
502         assertEquals("getBOM", ByteOrderMark.UTF_32BE, in.getBOM());
503         try {
504             in.hasBOM(ByteOrderMark.UTF_32LE);
505             fail("Expected IllegalArgumentException");
506         } catch (final IllegalArgumentException e) {
507             // expected - not configured for UTF-32LE
508         }
509         in.close();
510     }
511 
512     @Test
513     public void testReadWithBOMUtf32Le() throws Exception {
514         Assume.assumeTrue(Charset.isSupported("UTF_32LE"));
515         final byte[] data = "ABC".getBytes("UTF_32LE");
516         final BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE);
517         assertEquals('A', in.read());
518         assertEquals(0, in.read());
519         assertEquals(0, in.read());
520         assertEquals(0, in.read());
521         assertEquals('B', in.read());
522         assertEquals(0, in.read());
523         assertEquals(0, in.read());
524         assertEquals(0, in.read());
525         assertEquals('C', in.read());
526         assertEquals(0, in.read());
527         assertEquals(0, in.read());
528         assertEquals(0, in.read());
529         assertEquals(-1, in.read());
530         assertTrue("hasBOM()", in.hasBOM());
531         assertTrue("hasBOM(UTF-32LE)", in.hasBOM(ByteOrderMark.UTF_32LE));
532         assertEquals("getBOM", ByteOrderMark.UTF_32LE, in.getBOM());
533         try {
534             in.hasBOM(ByteOrderMark.UTF_32BE);
535             fail("Expected IllegalArgumentException");
536         } catch (final IllegalArgumentException e) {
537             // expected - not configured for UTF-32BE
538         }
539         in.close();
540     }
541 
542     @Test
543     public void testReadWithBOMUtf8() throws Exception {
544         final byte[] data = "ABC".getBytes(Charsets.UTF_8);
545         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_8);
546         assertEquals('A', in.read());
547         assertEquals('B', in.read());
548         assertEquals('C', in.read());
549         assertEquals(-1, in.read());
550         assertTrue("hasBOM()", in.hasBOM());
551         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
552         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
553         try {
554             in.hasBOM(ByteOrderMark.UTF_16BE);
555             fail("Expected IllegalArgumentException");
556         } catch (final IllegalArgumentException e) {
557             // expected - not configured for UTF-16BE
558         }
559         in.close();
560     }
561 
562     @Test
563     public void testReadWithMultipleBOM() throws Exception {
564         final byte[] data = new byte[] { 'A', 'B', 'C' };
565         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true),
566                                             ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8);
567         assertEquals('A', in.read());
568         assertEquals('B', in.read());
569         assertEquals('C', in.read());
570         assertEquals(-1, in.read());
571         assertTrue("hasBOM()", in.hasBOM());
572         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
573         assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
574         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
575         in.close();
576     }
577 
578     @Test
579     public void testReadWithoutBOM() throws Exception {
580         final byte[] data = new byte[] { 'A', 'B', 'C' };
581         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
582         assertEquals('A', in.read());
583         assertEquals('B', in.read());
584         assertEquals('C', in.read());
585         assertEquals(-1, in.read());
586         assertFalse("hasBOM()", in.hasBOM());
587         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
588         assertNull("getBOM", in.getBOM());
589         in.close();
590     }
591 
592     @Test
593     public void testReadXmlWithBOMUcs2() throws Exception {
594         // UCS-2 is BE.
595         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
596         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
597         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
598         parseXml(createUtf16BeDataStream(data, true));
599     }
600 
601     @Test
602     public void testReadXmlWithBOMUcs4() throws Exception {
603         // UCS-4 is BE or LE?
604         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
605         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
606         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
607         // XML parser does not know what to do with UTF-32
608         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
609         // XML parser does not know what to do with UTF-32
610         parseXml(createUtf32BeDataStream(data, true));
611     }
612 
613     @Test
614     public void testReadXmlWithBOMUtf16Be() throws Exception {
615         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(Charsets.UTF_16BE);
616         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
617         parseXml(createUtf16BeDataStream(data, true));
618     }
619 
620     @Test
621     public void testReadXmlWithBOMUtf16Le() throws Exception {
622         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(Charsets.UTF_16LE);
623         parseXml(new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE));
624         parseXml(createUtf16LeDataStream(data, true));
625     }
626 
627     @Test
628     public void testReadXmlWithBOMUtf32Be() throws Exception {
629         Assume.assumeTrue(Charset.isSupported("UTF_32BE"));
630         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
631         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
632         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
633         parseXml(new XmlStreamReader(createUtf32BeDataStream(data, true)));
634     }
635 
636     @Test
637     public void testReadXmlWithBOMUtf32Le() throws Exception {
638         Assume.assumeTrue(Charset.isSupported("UTF_32LE"));
639         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
640         parseXml(new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE));
641         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
642         parseXml(new XmlStreamReader(createUtf32LeDataStream(data, true)));
643     }
644 
645     @Test
646     public void testReadXmlWithBOMUtf8() throws Exception {
647         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(Charsets.UTF_8);
648         parseXml(new BOMInputStream(createUtf8DataStream(data, true)));
649         parseXml(createUtf8DataStream(data, true));
650     }
651 
652     @Test
653     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
654         Assume.assumeTrue(Charset.isSupported("UTF_32BE"));
655         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
656         parseXml(new BOMInputStream(createUtf32BeDataStream(data, false)));
657         parseXml(createUtf32BeDataStream(data, false));
658     }
659 
660     @Test
661     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
662         Assume.assumeTrue(Charset.isSupported("UTF_32LE"));
663         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
664         parseXml(new BOMInputStream(createUtf32LeDataStream(data, false)));
665         parseXml(createUtf32BeDataStream(data, false));
666     }
667 
668     @Test
669     public void testSkipWithBOM() throws Exception {
670         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
671         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
672         in.skip(2L);
673         assertEquals('C', in.read());
674         in.close();
675     }
676 
677     @Test
678     public void testSkipWithoutBOM() throws Exception {
679         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
680         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
681         in.skip(2L);
682         assertEquals('C', in.read());
683         in.close();
684     }
685 
686     @Test
687     public void testSmallBufferWithBOM() throws Exception {
688         final byte[] data = new byte[] { 'A', 'B', 'C' };
689         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
690         final byte[] buf = new byte[1024];
691         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
692         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
693         in.close();
694     }
695 
696     @Test
697     public void testSmallBufferWithoutBOM() throws Exception {
698         final byte[] data = new byte[] { 'A', 'B', 'C' };
699         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
700         final byte[] buf = new byte[1024];
701         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
702         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
703         in.close();
704     }
705 
706     @Test
707     // make sure that our support code works as expected
708     public void testSupportCode() throws Exception {
709         final InputStream in = createUtf8DataStream(new byte[] { 'A', 'B' }, true);
710         final byte[] buf = new byte[1024];
711         final int len = in.read(buf);
712         assertEquals(5, len);
713         assertEquals(0xEF, buf[0] & 0xFF);
714         assertEquals(0xBB, buf[1] & 0xFF);
715         assertEquals(0xBF, buf[2] & 0xFF);
716         assertEquals('A', buf[3] & 0xFF);
717         assertEquals('B', buf[4] & 0xFF);
718 
719         assertData(
720                 new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' },
721                 buf, len);
722     }
723 }