View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.*;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.Reader;
25  import java.nio.charset.Charset;
26  
27  import org.apache.commons.io.ByteOrderMark;
28  import org.apache.commons.io.Charsets;
29  import org.junit.Assert;
30  import org.junit.Assume;
31  import org.junit.Test;
32  import org.w3c.dom.Document;
33  import org.xml.sax.InputSource;
34  import org.xml.sax.SAXException;
35  import org.xml.sax.SAXParseException;
36  
37  import javax.xml.parsers.DocumentBuilder;
38  import javax.xml.parsers.DocumentBuilderFactory;
39  import javax.xml.parsers.ParserConfigurationException;
40  
41  /**
42   * Test case for {@link BOMInputStream}.
43   *
44   * @version $Id: BOMInputStreamTest.java 1721094 2015-12-21 05:51:18Z krosenvold $
45   */
46  @SuppressWarnings("ResultOfMethodCallIgnored")
47  public class BOMInputStreamTest {
48      //----------------------------------------------------------------------------
49      //  Support code
50      //----------------------------------------------------------------------------
51  
52      /**
53       *  A mock InputStream that expects <code>close()</code> to be called.
54       */
55      private static class ExpectCloseInputStream extends InputStream {
56          private boolean _closeCalled;
57  
58          public void assertCloseCalled() {
59              assertTrue(_closeCalled);
60          }
61  
62          @Override
63          public void close() throws IOException {
64              _closeCalled = true;
65          }
66  
67          @Override
68          public int read() throws IOException {
69              return -1;
70          }
71      }
72  
73      private void assertData(final byte[] expected, final byte[] actual, final int len)
74          throws Exception {
75          assertEquals("length", expected.length, len);
76          for (int ii = 0; ii < expected.length; ii++) {
77              assertEquals("byte " + ii, expected[ii], actual[ii]);
78          }
79      }
80  
81      /**
82       *  Creates the underlying data stream, with or without BOM.
83       */
84      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
85          byte[] data = baseData;
86          if (addBOM) {
87              data = new byte[baseData.length + 2];
88              data[0] = (byte) 0xFE;
89              data[1] = (byte) 0xFF;
90              System.arraycopy(baseData, 0, data, 2, baseData.length);
91          }
92          return new ByteArrayInputStream(data);
93      }
94  
95      /**
96       *  Creates the underlying data stream, with or without BOM.
97       */
98      private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
99          byte[] data = baseData;
100         if (addBOM) {
101             data = new byte[baseData.length + 2];
102             data[0] = (byte) 0xFF;
103             data[1] = (byte) 0xFE;
104             System.arraycopy(baseData, 0, data, 2, baseData.length);
105         }
106         return new ByteArrayInputStream(data);
107     }
108 
109     /**
110      *  Creates the underlying data stream, with or without BOM.
111      */
112     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
113         byte[] data = baseData;
114         if (addBOM) {
115             data = new byte[baseData.length + 4];
116             data[0] = 0;
117             data[1] = 0;
118             data[2] = (byte) 0xFE;
119             data[3] = (byte) 0xFF;
120             System.arraycopy(baseData, 0, data, 4, baseData.length);
121         }
122         return new ByteArrayInputStream(data);
123     }
124 
125     /**
126      *  Creates the underlying data stream, with or without BOM.
127      */
128     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
129         byte[] data = baseData;
130         if (addBOM) {
131             data = new byte[baseData.length + 4];
132             data[0] = (byte) 0xFF;
133             data[1] = (byte) 0xFE;
134             data[2] = 0;
135             data[3] = 0;
136             System.arraycopy(baseData, 0, data, 4, baseData.length);
137         }
138         return new ByteArrayInputStream(data);
139     }
140 
141     /**
142      *  Creates the underlying data stream, with or without BOM.
143      */
144     private InputStream createUtf8DataStream(final byte[] baseData, final boolean addBOM) {
145         byte[] data = baseData;
146         if (addBOM) {
147             data = new byte[baseData.length + 3];
148             data[0] = (byte) 0xEF;
149             data[1] = (byte) 0xBB;
150             data[2] = (byte) 0xBF;
151             System.arraycopy(baseData, 0, data, 3, baseData.length);
152         }
153         return new ByteArrayInputStream(data);
154     }
155 
156     //----------------------------------------------------------------------------
157     //  Test cases
158     //----------------------------------------------------------------------------
159 
160     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
161         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
162         assertNotNull(doc);
163         assertEquals("X", doc.getFirstChild().getNodeName());
164     }
165 
166     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
167         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
168         assertNotNull(doc);
169         assertEquals("X", doc.getFirstChild().getNodeName());
170     }
171 
172     private void readBOMInputStreamTwice(final String resource) throws Exception {
173         final InputStream inputStream = this.getClass().getResourceAsStream(resource);
174         Assert.assertNotNull(inputStream);
175         final BOMInputStream bomInputStream = new BOMInputStream(inputStream);
176         bomInputStream.mark(1000000);
177 
178         this.readFile(bomInputStream);
179         bomInputStream.reset();
180         this.readFile(bomInputStream);
181         inputStream.close();
182         bomInputStream.close();
183     }
184 
185     private void readFile(final BOMInputStream bomInputStream) throws Exception {
186         int bytes;
187         final byte[] bytesFromStream = new byte[100];
188         do {
189             bytes = bomInputStream.read(bytesFromStream);
190         } while (bytes > 0);
191     }
192 
193     @Test
194     public void testAvailableWithBOM() throws Exception {
195         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
196         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
197         assertEquals(7, in.available());
198         in.close();
199     }
200 
201     @Test
202     public void testAvailableWithoutBOM() throws Exception {
203         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
204         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
205         assertEquals(4, in.available());
206         in.close();
207     }
208 
209     @Test
210     // this is here for coverage
211     public void testClose() throws Exception {
212         final ExpectCloseInputStream del = new ExpectCloseInputStream();
213         final InputStream in = new BOMInputStream(del);
214 
215         in.close();
216         del.assertCloseCalled();
217         del.close();
218     }
219 
220     @Test
221     public void testEmptyBufferWithBOM() throws Exception {
222         final byte[] data = new byte[] {};
223         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
224         final byte[] buf = new byte[1024];
225         assertEquals(-1, in.read(buf));
226         in.close();
227     }
228 
229     @Test
230     public void testEmptyBufferWithoutBOM() throws Exception {
231         final byte[] data = new byte[] {};
232         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
233         final byte[] buf = new byte[1024];
234         assertEquals(-1, in.read(buf));
235         in.close();
236     }
237 
238     @Test
239     public void testGetBOMFirstThenRead() throws Exception {
240         final byte[] data = new byte[] { 'A', 'B', 'C' };
241         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true));
242         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
243         assertTrue("hasBOM()", in.hasBOM());
244         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
245         assertEquals('A', in.read());
246         assertEquals('B', in.read());
247         assertEquals('C', in.read());
248         assertEquals(-1, in.read());
249         in.close();
250     }
251 
252     @Test
253     public void testGetBOMFirstThenReadInclude() throws Exception {
254         final byte[] data = new byte[] { 'A', 'B', 'C' };
255         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
256         assertTrue("hasBOM()", in.hasBOM());
257         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
258         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
259         assertEquals(0xEF, in.read());
260         assertEquals(0xBB, in.read());
261         assertEquals(0xBF, in.read());
262         assertEquals('A', in.read());
263         assertEquals('B', in.read());
264         assertEquals('C', in.read());
265         assertEquals(-1, in.read());
266         in.close();
267     }
268 
269     @Test
270     public void testLargeBufferWithBOM() throws Exception {
271         final byte[] data = new byte[] { 'A', 'B', 'C' };
272         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
273         final byte[] buf = new byte[1024];
274         assertData(data, buf, in.read(buf));
275         in.close();
276     }
277 
278     @Test
279     public void testLargeBufferWithoutBOM() throws Exception {
280         final byte[] data = new byte[] { 'A', 'B', 'C' };
281         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
282         final byte[] buf = new byte[1024];
283         assertData(data, buf, in.read(buf));
284         in.close();
285     }
286 
287     @Test
288     public void testLeadingNonBOMBufferedRead() throws Exception {
289         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
290         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
291         final byte[] buf = new byte[1024];
292         assertData(data, buf, in.read(buf));
293         in.close();
294     }
295 
296     @Test
297     public void testLeadingNonBOMSingleRead() throws Exception {
298         final byte[] data = new byte[] { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
299         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
300         assertEquals(0xEF, in.read());
301         assertEquals(0xAB, in.read());
302         assertEquals(0xCD, in.read());
303         assertEquals(-1, in.read());
304         in.close();
305     }
306 
307     @Test
308     public void testMarkResetAfterReadWithBOM() throws Exception {
309         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
310         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
311         assertTrue(in.markSupported());
312 
313         in.read();
314         in.mark(10);
315 
316         in.read();
317         in.read();
318         in.reset();
319         assertEquals('B', in.read());
320         in.close();
321     }
322 
323     @Test
324     public void testMarkResetAfterReadWithoutBOM() throws Exception {
325         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
326         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
327         assertTrue(in.markSupported());
328 
329         in.read();
330         in.mark(10);
331 
332         in.read();
333         in.read();
334         in.reset();
335         assertEquals('B', in.read());
336         in.close();
337     }
338 
339     @Test
340     public void testMarkResetBeforeReadWithBOM() throws Exception {
341         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
342         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
343         assertTrue(in.markSupported());
344 
345         in.mark(10);
346 
347         in.read();
348         in.read();
349         in.reset();
350         assertEquals('A', in.read());
351         in.close();
352     }
353 
354     @Test
355     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
356         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
357         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
358         assertTrue(in.markSupported());
359 
360         in.mark(10);
361 
362         in.read();
363         in.read();
364         in.reset();
365         assertEquals('A', in.read());
366         in.close();
367     }
368 
369     @Test
370     public void testNoBoms() throws Exception {
371         final byte[] data = new byte[] { 'A', 'B', 'C' };
372         try {
373             (new BOMInputStream(createUtf8DataStream(data, true), false, (ByteOrderMark[])null)).close();
374             fail("Null BOMs, expected IllegalArgumentException");
375         } catch (final IllegalArgumentException e) {
376             // expected
377         }
378         try {
379             (new BOMInputStream(createUtf8DataStream(data, true), false, new ByteOrderMark[0])).close();
380             fail("Null BOMs, expected IllegalArgumentException");
381         } catch (final IllegalArgumentException e) {
382             // expected
383         }
384     }
385 
386 
387 
388 
389 
390     @Test
391     public void testReadEmpty() throws Exception {
392         final byte[] data = new byte[] {};
393         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
394         assertEquals(-1, in.read());
395         assertFalse("hasBOM()", in.hasBOM());
396         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
397         assertNull("getBOM", in.getBOM());
398         in.close();
399     }
400 
401     @Test
402     public void testReadSmall() throws Exception {
403         final byte[] data = new byte[] { 'A', 'B' };
404         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
405         assertEquals('A', in.read());
406         assertEquals('B', in.read());
407         assertEquals(-1, in.read());
408         assertFalse("hasBOM()", in.hasBOM());
409         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
410         assertNull("getBOM", in.getBOM());
411         in.close();
412     }
413 
414     @Test
415     public void testReadTwiceWithBOM() throws Exception {
416         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
417     }
418 
419     @Test
420     public void testReadTwiceWithoutBOM() throws Exception {
421         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
422     }
423 
424     @Test
425     public void testReadWithBOMInclude() throws Exception {
426         final byte[] data = new byte[] { 'A', 'B', 'C' };
427         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true);
428         assertEquals(0xEF, in.read());
429         assertEquals(0xBB, in.read());
430         assertEquals(0xBF, in.read());
431         assertEquals('A', in.read());
432         assertEquals('B', in.read());
433         assertEquals('C', in.read());
434         assertEquals(-1, in.read());
435         assertTrue("hasBOM()", in.hasBOM());
436         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
437         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
438         in.close();
439     }
440 
441     @Test
442     public void testReadWithBOMUtf16Be() throws Exception {
443         @SuppressWarnings("deprecation") // unavoidable until Java 7
444         final byte[] data = "ABC".getBytes(Charsets.UTF_16BE);
445         final BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE);
446         assertEquals(0, in.read());
447         assertEquals('A', in.read());
448         assertEquals(0, in.read());
449         assertEquals('B', in.read());
450         assertEquals(0, in.read());
451         assertEquals('C', in.read());
452         assertEquals(-1, in.read());
453         assertTrue("hasBOM()", in.hasBOM());
454         assertTrue("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
455         assertEquals("getBOM", ByteOrderMark.UTF_16BE, in.getBOM());
456         try {
457             in.hasBOM(ByteOrderMark.UTF_16LE);
458             fail("Expected IllegalArgumentException");
459         } catch (final IllegalArgumentException e) {
460             // expected - not configured for UTF-16LE
461         }
462         in.close();
463     }
464 
465     @Test
466     public void testReadWithBOMUtf16Le() throws Exception {
467         @SuppressWarnings("deprecation") // unavoidable until Java 7
468         final byte[] data = "ABC".getBytes(Charsets.UTF_16LE);
469         final BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE);
470         assertEquals('A', in.read());
471         assertEquals(0, in.read());
472         assertEquals('B', in.read());
473         assertEquals(0, in.read());
474         assertEquals('C', in.read());
475         assertEquals(0, in.read());
476         assertEquals(-1, in.read());
477         assertTrue("hasBOM()", in.hasBOM());
478         assertTrue("hasBOM(UTF-16LE)", in.hasBOM(ByteOrderMark.UTF_16LE));
479         assertEquals("getBOM", ByteOrderMark.UTF_16LE, in.getBOM());
480         try {
481             in.hasBOM(ByteOrderMark.UTF_16BE);
482             fail("Expected IllegalArgumentException");
483         } catch (final IllegalArgumentException e) {
484             // expected - not configured for UTF-16BE
485         }
486         in.close();
487     }
488 
489     @Test
490     public void testReadWithBOMUtf32Be() throws Exception {
491         Assume.assumeTrue(Charset.isSupported("UTF_32BE"));
492         final byte[] data = "ABC".getBytes("UTF_32BE");
493         final BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE);
494         assertEquals(0, in.read());
495         assertEquals(0, in.read());
496         assertEquals(0, in.read());
497         assertEquals('A', in.read());
498         assertEquals(0, in.read());
499         assertEquals(0, in.read());
500         assertEquals(0, in.read());
501         assertEquals('B', in.read());
502         assertEquals(0, in.read());
503         assertEquals(0, in.read());
504         assertEquals(0, in.read());
505         assertEquals('C', in.read());
506         assertEquals(-1, in.read());
507         assertTrue("hasBOM()", in.hasBOM());
508         assertTrue("hasBOM(UTF-32BE)", in.hasBOM(ByteOrderMark.UTF_32BE));
509         assertEquals("getBOM", ByteOrderMark.UTF_32BE, in.getBOM());
510         try {
511             in.hasBOM(ByteOrderMark.UTF_32LE);
512             fail("Expected IllegalArgumentException");
513         } catch (final IllegalArgumentException e) {
514             // expected - not configured for UTF-32LE
515         }
516         in.close();
517     }
518 
519     @Test
520     public void testReadWithBOMUtf32Le() throws Exception {
521         Assume.assumeTrue(Charset.isSupported("UTF_32LE"));
522         final byte[] data = "ABC".getBytes("UTF_32LE");
523         final BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE);
524         assertEquals('A', in.read());
525         assertEquals(0, in.read());
526         assertEquals(0, in.read());
527         assertEquals(0, in.read());
528         assertEquals('B', in.read());
529         assertEquals(0, in.read());
530         assertEquals(0, in.read());
531         assertEquals(0, in.read());
532         assertEquals('C', in.read());
533         assertEquals(0, in.read());
534         assertEquals(0, in.read());
535         assertEquals(0, in.read());
536         assertEquals(-1, in.read());
537         assertTrue("hasBOM()", in.hasBOM());
538         assertTrue("hasBOM(UTF-32LE)", in.hasBOM(ByteOrderMark.UTF_32LE));
539         assertEquals("getBOM", ByteOrderMark.UTF_32LE, in.getBOM());
540         try {
541             in.hasBOM(ByteOrderMark.UTF_32BE);
542             fail("Expected IllegalArgumentException");
543         } catch (final IllegalArgumentException e) {
544             // expected - not configured for UTF-32BE
545         }
546         in.close();
547     }
548 
549     @Test
550     public void testReadWithBOMUtf8() throws Exception {
551         @SuppressWarnings("deprecation") // unavoidable until Java 7
552         final byte[] data = "ABC".getBytes(Charsets.UTF_8);
553         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_8);
554         assertEquals('A', in.read());
555         assertEquals('B', in.read());
556         assertEquals('C', in.read());
557         assertEquals(-1, in.read());
558         assertTrue("hasBOM()", in.hasBOM());
559         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
560         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
561         try {
562             in.hasBOM(ByteOrderMark.UTF_16BE);
563             fail("Expected IllegalArgumentException");
564         } catch (final IllegalArgumentException e) {
565             // expected - not configured for UTF-16BE
566         }
567         in.close();
568     }
569 
570     @Test
571     public void testReadWithMultipleBOM() throws Exception {
572         final byte[] data = new byte[] { 'A', 'B', 'C' };
573         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true),
574                                             ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8);
575         assertEquals('A', in.read());
576         assertEquals('B', in.read());
577         assertEquals('C', in.read());
578         assertEquals(-1, in.read());
579         assertTrue("hasBOM()", in.hasBOM());
580         assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
581         assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE));
582         assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM());
583         in.close();
584     }
585 
586     @Test
587     public void testReadWithoutBOM() throws Exception {
588         final byte[] data = new byte[] { 'A', 'B', 'C' };
589         final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false));
590         assertEquals('A', in.read());
591         assertEquals('B', in.read());
592         assertEquals('C', in.read());
593         assertEquals(-1, in.read());
594         assertFalse("hasBOM()", in.hasBOM());
595         assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8));
596         assertNull("getBOM", in.getBOM());
597         in.close();
598     }
599 
600     @Test
601     public void testReadXmlWithBOMUcs2() throws Exception {
602         Assume.assumeFalse("This test does not pass on some IBM VMs xml parsers", System.getProperty("java.vendor").contains("IBM"));
603 
604         // UCS-2 is BE.
605         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
606         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
607         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
608         parseXml(createUtf16BeDataStream(data, true));
609     }
610 
611     @Test
612     public void testReadXmlWithBOMUcs4() throws Exception {
613         // UCS-4 is BE or LE?
614         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
615         Assume.assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
616         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
617         // XML parser does not know what to do with UTF-32
618         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
619         // XML parser does not know what to do with UTF-32
620         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
621         parseXml(createUtf32BeDataStream(data, true));
622     }
623 
624     @Test
625     @SuppressWarnings("deprecation") // unavoidable until Java 7
626     public void testReadXmlWithBOMUtf16Be() throws Exception {
627         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(Charsets.UTF_16BE);
628         parseXml(new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE));
629         parseXml(createUtf16BeDataStream(data, true));
630     }
631 
632     @Test
633     @SuppressWarnings("deprecation") // unavoidable until Java 7
634     public void testReadXmlWithBOMUtf16Le() throws Exception {
635         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(Charsets.UTF_16LE);
636         parseXml(new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE));
637         parseXml(createUtf16LeDataStream(data, true));
638     }
639 
640     @Test
641     public void testReadXmlWithBOMUtf32Be() throws Exception {
642         Assume.assumeTrue("JVM and SAX need to support UTF_32BE for this", jvmAndSaxBothSupportCharset("UTF_32BE"));
643         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
644         parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE));
645         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
646         parseXml(new XmlStreamReader(createUtf32BeDataStream(data, true)));
647     }
648 
649     @Test
650     public void testReadXmlWithBOMUtf32Le() throws Exception {
651         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
652         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
653         parseXml(new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE));
654         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
655         parseXml(new XmlStreamReader(createUtf32LeDataStream(data, true)));
656     }
657 
658     @Test
659     public void testReadXmlWithBOMUtf8() throws Exception {
660         @SuppressWarnings("deprecation") // unavoidable until Java 7
661         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(Charsets.UTF_8);
662         parseXml(new BOMInputStream(createUtf8DataStream(data, true)));
663         parseXml(createUtf8DataStream(data, true));
664     }
665 
666     @Test
667     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
668         Assume.assumeTrue("JVM and SAX need to support UTF_32BE for this", jvmAndSaxBothSupportCharset("UTF_32BE"));
669         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
670         parseXml(new BOMInputStream(createUtf32BeDataStream(data, false)));
671         parseXml(createUtf32BeDataStream(data, false));
672     }
673 
674     @Test
675     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
676         Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE"));
677         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
678         parseXml(new BOMInputStream(createUtf32LeDataStream(data, false)));
679         parseXml(createUtf32BeDataStream(data, false));
680     }
681 
682     @Test
683     public void testSkipWithBOM() throws Exception {
684         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
685         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
686         in.skip(2L);
687         assertEquals('C', in.read());
688         in.close();
689     }
690 
691     @Test
692     public void testSkipWithoutBOM() throws Exception {
693         final byte[] data = new byte[] { 'A', 'B', 'C', 'D' };
694         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
695         in.skip(2L);
696         assertEquals('C', in.read());
697         in.close();
698     }
699 
700 
701     @Test
702     public void skipReturnValueWithBom() throws IOException {
703         byte[] baseData = new byte[]{(byte) 0x31, (byte) 0x32, (byte) 0x33};
704         BOMInputStream is1 = new BOMInputStream(createUtf8DataStream(baseData, true));
705         assertEquals(2, is1.skip(2));
706         assertEquals((byte) 0x33, is1.read());
707         is1.close();
708     }
709 
710     @Test
711     public void skipReturnValueWithoutBom() throws IOException {
712         byte[] baseData = new byte[]{(byte) 0x31, (byte) 0x32, (byte) 0x33};
713         BOMInputStream is2 = new BOMInputStream(createUtf8DataStream(baseData, false));
714         assertEquals(2, is2.skip(2)); // IO-428
715         assertEquals((byte) 0x33, is2.read());
716         is2.close();
717     }
718 
719     @Test
720     public void testSmallBufferWithBOM() throws Exception {
721         final byte[] data = new byte[] { 'A', 'B', 'C' };
722         final InputStream in = new BOMInputStream(createUtf8DataStream(data, true));
723         final byte[] buf = new byte[1024];
724         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
725         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
726         in.close();
727     }
728 
729     @Test
730     public void testSmallBufferWithoutBOM() throws Exception {
731         final byte[] data = new byte[] { 'A', 'B', 'C' };
732         final InputStream in = new BOMInputStream(createUtf8DataStream(data, false));
733         final byte[] buf = new byte[1024];
734         assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
735         assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
736         in.close();
737     }
738 
739     @Test
740     // make sure that our support code works as expected
741     public void testSupportCode() throws Exception {
742         final InputStream in = createUtf8DataStream(new byte[] { 'A', 'B' }, true);
743         final byte[] buf = new byte[1024];
744         final int len = in.read(buf);
745         assertEquals(5, len);
746         assertEquals(0xEF, buf[0] & 0xFF);
747         assertEquals(0xBB, buf[1] & 0xFF);
748         assertEquals(0xBF, buf[2] & 0xFF);
749         assertEquals('A', buf[3] & 0xFF);
750         assertEquals('B', buf[4] & 0xFF);
751 
752         assertData(
753                 new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' },
754                 buf, len);
755     }
756 
757     private boolean jvmAndSaxBothSupportCharset(String charSetName) throws ParserConfigurationException, SAXException, IOException {
758         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
759     }
760 
761     private boolean doesSaxSupportCharacterSet(String charSetName) throws ParserConfigurationException, SAXException, IOException {
762         final byte[] data = ("<?xml version=\"1.0\" encoding=\"" + charSetName + "\"?><Z/>").getBytes(charSetName);
763         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
764         try {
765             final InputSource is = new InputSource(new ByteArrayInputStream(data));
766             is.setEncoding(charSetName);
767             documentBuilder.parse(is);
768         } catch (SAXParseException e) {
769             if (e.getMessage().contains(charSetName)) return false;
770         }
771         return true;
772     }
773 }