View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  import static org.junit.jupiter.api.Assumptions.assumeFalse;
26  import static org.junit.jupiter.api.Assumptions.assumeTrue;
27  
28  import java.io.ByteArrayInputStream;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.Reader;
32  import java.nio.charset.Charset;
33  import java.nio.charset.StandardCharsets;
34  
35  import javax.xml.parsers.DocumentBuilder;
36  import javax.xml.parsers.DocumentBuilderFactory;
37  import javax.xml.parsers.ParserConfigurationException;
38  
39  import org.apache.commons.io.ByteOrderMark;
40  import org.junit.jupiter.api.Test;
41  import org.w3c.dom.Document;
42  import org.xml.sax.InputSource;
43  import org.xml.sax.SAXException;
44  import org.xml.sax.SAXParseException;
45  
46  /**
47   * Test case for {@link BOMInputStream}.
48   */
49  @SuppressWarnings("ResultOfMethodCallIgnored")
50  public class BOMInputStreamTest {
51  
52      /**
53       *  A mock InputStream that expects {@code close()} to be called.
54       */
55      private static final class ExpectCloseInputStream extends InputStream {
56          private boolean _closeCalled;
57  
58          public void assertCloseCalled() {
59              assertTrue(_closeCalled);
60          }
61  
62          @Override
63          public void close() throws IOException {
64              _closeCalled = true;
65          }
66  
67          @Override
68          public int read() throws IOException {
69              return -1;
70          }
71      }
72  
73      private void assertData(final byte[] expected, final byte[] actual, final int len) {
74          assertEquals(expected.length, len, "length");
75          for (int ii = 0; ii < expected.length; ii++) {
76              assertEquals(expected[ii], actual[ii], "byte " + ii);
77          }
78      }
79  
80      /**
81       *  Creates the underlying data stream, with or without BOM.
82       */
83      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
84          byte[] data = baseData;
85          if (addBOM) {
86              data = new byte[baseData.length + 2];
87              data[0] = (byte) 0xFE;
88              data[1] = (byte) 0xFF;
89              System.arraycopy(baseData, 0, data, 2, baseData.length);
90          }
91          return new ByteArrayInputStream(data);
92      }
93  
94      /**
95       *  Creates the underlying data stream, with or without BOM.
96       */
97      private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
98          byte[] data = baseData;
99          if (addBOM) {
100             data = new byte[baseData.length + 2];
101             data[0] = (byte) 0xFF;
102             data[1] = (byte) 0xFE;
103             System.arraycopy(baseData, 0, data, 2, baseData.length);
104         }
105         return new ByteArrayInputStream(data);
106     }
107 
108     /**
109      *  Creates the underlying data stream, with or without BOM.
110      */
111     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
112         byte[] data = baseData;
113         if (addBOM) {
114             data = new byte[baseData.length + 4];
115             data[0] = 0;
116             data[1] = 0;
117             data[2] = (byte) 0xFE;
118             data[3] = (byte) 0xFF;
119             System.arraycopy(baseData, 0, data, 4, baseData.length);
120         }
121         return new ByteArrayInputStream(data);
122     }
123 
124     /**
125      *  Creates the underlying data stream, with or without BOM.
126      */
127     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
128         byte[] data = baseData;
129         if (addBOM) {
130             data = new byte[baseData.length + 4];
131             data[0] = (byte) 0xFF;
132             data[1] = (byte) 0xFE;
133             data[2] = 0;
134             data[3] = 0;
135             System.arraycopy(baseData, 0, data, 4, baseData.length);
136         }
137         return new ByteArrayInputStream(data);
138     }
139 
140     /**
141      *  Creates the underlying data stream, with or without BOM.
142      */
143     private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
144         byte[] data = baseData;
145         if (addBOM) {
146             data = new byte[baseData.length + 3];
147             data[0] = (byte) 0xEF;
148             data[1] = (byte) 0xBB;
149             data[2] = (byte) 0xBF;
150             System.arraycopy(baseData, 0, data, 3, baseData.length);
151         }
152         return new ByteArrayInputStream(data);
153     }
154 
155     private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
156         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
157         try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
158                 .setCharset(charsetName).get()) {
159             final InputSource is = new InputSource(byteStream);
160             is.setEncoding(charsetName);
161             documentBuilder.parse(is);
162         } catch (final SAXParseException e) {
163             if (e.getMessage().contains(charsetName)) {
164                 return false;
165             }
166         }
167         return true;
168     }
169 
170     private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
171         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
172     }
173 
174     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
175         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
176         assertNotNull(doc);
177         assertEquals("X", doc.getFirstChild().getNodeName());
178     }
179 
180     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
181         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
182         assertNotNull(doc);
183         assertEquals("X", doc.getFirstChild().getNodeName());
184     }
185 
186     private void readBOMInputStreamTwice(final String resource) throws Exception {
187         try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
188             assertNotNull(inputStream);
189             try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
190                 bomInputStream.mark(1_000_000);
191 
192                 this.readFile(bomInputStream);
193                 bomInputStream.reset();
194                 this.readFile(bomInputStream);
195                 inputStream.close();
196             }
197         }
198     }
199 
200     private void readFile(final BOMInputStream bomInputStream) throws Exception {
201         int bytes;
202         final byte[] bytesFromStream = new byte[100];
203         do {
204             bytes = bomInputStream.read(bytesFromStream);
205         } while (bytes > 0);
206     }
207 
208     @Test
209     public void testAvailableWithBOM() throws Exception {
210         final byte[] data = { 'A', 'B', 'C', 'D' };
211         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
212             assertEquals(7, in.available());
213         }
214     }
215 
216     @Test
217     public void testAvailableWithoutBOM() throws Exception {
218         final byte[] data = { 'A', 'B', 'C', 'D' };
219         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
220             assertEquals(4, in.available());
221         }
222     }
223 
224     @Test
225     public void testBuilderGet() {
226         // java.lang.IllegalStateException: origin == null
227         assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
228     }
229 
230     @Test
231     // this is here for coverage
232     public void testClose() throws Exception {
233         try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
234             try (InputStream in = new BOMInputStream(del)) {
235                 // nothing
236             }
237             del.assertCloseCalled();
238         }
239     }
240 
241     @Test
242     public void testEmptyBufferWithBOM() throws Exception {
243         final byte[] data = {};
244         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
245             final byte[] buf = new byte[1024];
246             assertEquals(-1, in.read(buf));
247         }
248     }
249 
250     @Test
251     public void testEmptyBufferWithoutBOM() throws Exception {
252         final byte[] data = {};
253         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
254             final byte[] buf = new byte[1024];
255             assertEquals(-1, in.read(buf));
256         }
257     }
258 
259     @Test
260     public void testGetBOMFirstThenRead() throws Exception {
261         final byte[] data = { 'A', 'B', 'C' };
262         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
263             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
264             assertTrue(in.hasBOM(), "hasBOM()");
265             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
266             assertEquals('A', in.read());
267             assertEquals('B', in.read());
268             assertEquals('C', in.read());
269             assertEquals(-1, in.read());
270         }
271     }
272 
273     @Test
274     public void testGetBOMFirstThenReadInclude() throws Exception {
275         final byte[] data = { 'A', 'B', 'C' };
276         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
277             assertTrue(in.hasBOM(), "hasBOM()");
278             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
279             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
280             assertEquals(0xEF, in.read());
281             assertEquals(0xBB, in.read());
282             assertEquals(0xBF, in.read());
283             assertEquals('A', in.read());
284             assertEquals('B', in.read());
285             assertEquals('C', in.read());
286             assertEquals(-1, in.read());
287         }
288     }
289 
290     @Test
291     public void testLargeBufferWithBOM() throws Exception {
292         final byte[] data = { 'A', 'B', 'C' };
293         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
294             final byte[] buf = new byte[1024];
295             assertData(data, buf, in.read(buf));
296         }
297     }
298 
299     @Test
300     public void testLargeBufferWithoutBOM() throws Exception {
301         final byte[] data = { 'A', 'B', 'C' };
302         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
303             final byte[] buf = new byte[1024];
304             assertData(data, buf, in.read(buf));
305         }
306     }
307 
308     @Test
309     public void testLeadingNonBOMBufferedRead() throws Exception {
310         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
311         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
312             final byte[] buf = new byte[1024];
313             assertData(data, buf, in.read(buf));
314         }
315     }
316 
317     @Test
318     public void testLeadingNonBOMSingleRead() throws Exception {
319         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
320         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
321             assertEquals(0xEF, in.read());
322             assertEquals(0xAB, in.read());
323             assertEquals(0xCD, in.read());
324             assertEquals(-1, in.read());
325         }
326     }
327 
328     @Test
329     public void testMarkResetAfterReadWithBOM() throws Exception {
330         final byte[] data = { 'A', 'B', 'C', 'D' };
331         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
332             assertTrue(in.markSupported());
333 
334             in.read();
335             in.mark(10);
336 
337             in.read();
338             in.read();
339             in.reset();
340             assertEquals('B', in.read());
341         }
342     }
343 
344     @Test
345     public void testMarkResetAfterReadWithoutBOM() throws Exception {
346         final byte[] data = { 'A', 'B', 'C', 'D' };
347         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
348             assertTrue(in.markSupported());
349 
350             in.read();
351             in.mark(10);
352 
353             in.read();
354             in.read();
355             in.reset();
356             assertEquals('B', in.read());
357         }
358     }
359 
360     @Test
361     public void testMarkResetBeforeReadWithBOM() throws Exception {
362         final byte[] data = { 'A', 'B', 'C', 'D' };
363         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
364             assertTrue(in.markSupported());
365 
366             in.mark(10);
367 
368             in.read();
369             in.read();
370             in.reset();
371             assertEquals('A', in.read());
372         }
373     }
374 
375     @Test
376     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
377         final byte[] data = { 'A', 'B', 'C', 'D' };
378         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
379             assertTrue(in.markSupported());
380 
381             in.mark(10);
382 
383             in.read();
384             in.read();
385             in.reset();
386             assertEquals('A', in.read());
387         }
388     }
389 
390     @Test
391     public void testNoBoms() throws Exception {
392         final byte[] data = { 'A', 'B', 'C' };
393         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
394         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
395         //
396         try (final BOMInputStream bomInputStream = BOMInputStream.builder()
397                 .setInputStream(createUtf8Input(data, true))
398                 .setInclude(true)
399                 .setByteOrderMarks((ByteOrderMark[]) null)
400                 .get()) {
401             assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
402         }
403         assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
404                 .setInputStream(createUtf8Input(data, true))
405                 .setInclude(true)
406                 .setByteOrderMarks()
407                 .get()
408                 .close());
409     }
410 
411     @Test
412     public void testReadEmpty() throws Exception {
413         final byte[] data = {};
414         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
415             assertEquals(-1, in.read());
416             assertFalse(in.hasBOM(), "hasBOM()");
417             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
418             assertNull(in.getBOM(), "getBOM");
419         }
420     }
421 
422     @Test
423     public void testReadSmall() throws Exception {
424         final byte[] data = { 'A', 'B' };
425         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
426             assertEquals('A', in.read());
427             assertEquals('B', in.read());
428             assertEquals(-1, in.read());
429             assertFalse(in.hasBOM(), "hasBOM()");
430             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
431             assertNull(in.getBOM(), "getBOM");
432         }
433     }
434 
435     @Test
436     public void testReadTwiceWithBOM() throws Exception {
437         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
438     }
439 
440     @Test
441     public void testReadTwiceWithoutBOM() throws Exception {
442         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
443     }
444 
445     @Test
446     public void testReadWithBOMInclude() throws Exception {
447         final byte[] data = { 'A', 'B', 'C' };
448         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
449             assertEquals(0xEF, in.read());
450             assertEquals(0xBB, in.read());
451             assertEquals(0xBF, in.read());
452             assertEquals('A', in.read());
453             assertEquals('B', in.read());
454             assertEquals('C', in.read());
455             assertEquals(-1, in.read());
456             assertTrue(in.hasBOM(), "hasBOM()");
457             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
458             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
459         }
460     }
461 
462     @Test
463     public void testReadWithBOMUtf16Be() throws Exception {
464         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
465         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
466             assertEquals(0, in.read());
467             assertEquals('A', in.read());
468             assertEquals(0, in.read());
469             assertEquals('B', in.read());
470             assertEquals(0, in.read());
471             assertEquals('C', in.read());
472             assertEquals(-1, in.read());
473             assertTrue(in.hasBOM(), "hasBOM()");
474             assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
475             assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
476             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
477         }
478     }
479 
480     @Test
481     public void testReadWithBOMUtf16Le() throws Exception {
482         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
483         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
484             assertEquals('A', in.read());
485             assertEquals(0, in.read());
486             assertEquals('B', in.read());
487             assertEquals(0, in.read());
488             assertEquals('C', in.read());
489             assertEquals(0, in.read());
490             assertEquals(-1, in.read());
491             assertTrue(in.hasBOM(), "hasBOM()");
492             assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
493             assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
494             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
495         }
496     }
497 
498     @Test
499     public void testReadWithBOMUtf32Be() throws Exception {
500         assumeTrue(Charset.isSupported("UTF_32BE"));
501         final byte[] data = "ABC".getBytes("UTF_32BE");
502         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
503                 ByteOrderMark.UTF_32BE)) {
504             assertEquals(0, in.read());
505             assertEquals(0, in.read());
506             assertEquals(0, in.read());
507             assertEquals('A', in.read());
508             assertEquals(0, in.read());
509             assertEquals(0, in.read());
510             assertEquals(0, in.read());
511             assertEquals('B', in.read());
512             assertEquals(0, in.read());
513             assertEquals(0, in.read());
514             assertEquals(0, in.read());
515             assertEquals('C', in.read());
516             assertEquals(-1, in.read());
517             assertTrue(in.hasBOM(), "hasBOM()");
518             assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
519             assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
520             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
521         }
522     }
523 
524     @Test
525     public void testReadWithBOMUtf32Le() throws Exception {
526         assumeTrue(Charset.isSupported("UTF_32LE"));
527         final byte[] data = "ABC".getBytes("UTF_32LE");
528         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
529                 ByteOrderMark.UTF_32LE)) {
530             assertEquals('A', in.read());
531             assertEquals(0, in.read());
532             assertEquals(0, in.read());
533             assertEquals(0, in.read());
534             assertEquals('B', in.read());
535             assertEquals(0, in.read());
536             assertEquals(0, in.read());
537             assertEquals(0, in.read());
538             assertEquals('C', in.read());
539             assertEquals(0, in.read());
540             assertEquals(0, in.read());
541             assertEquals(0, in.read());
542             assertEquals(-1, in.read());
543             assertTrue(in.hasBOM(), "hasBOM()");
544             assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
545             assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
546             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
547         }
548     }
549 
550     @Test
551     public void testReadWithBOMUtf8() throws Exception {
552         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
553         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
554             assertEquals('A', in.read());
555             assertEquals('B', in.read());
556             assertEquals('C', in.read());
557             assertEquals(-1, in.read());
558             assertTrue(in.hasBOM(), "hasBOM()");
559             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
560             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
561             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
562         }
563     }
564 
565     @Test
566     public void testReadWithMultipleBOM() throws Exception {
567         final byte[] data = { 'A', 'B', 'C' };
568         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
569                 ByteOrderMark.UTF_8)) {
570             assertEquals('A', in.read());
571             assertEquals('B', in.read());
572             assertEquals('C', in.read());
573             assertEquals(-1, in.read());
574             assertTrue(in.hasBOM(), "hasBOM()");
575             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
576             assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
577             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
578         }
579     }
580 
581     @Test
582     public void testReadWithoutBOM() throws Exception {
583         final byte[] data = { 'A', 'B', 'C' };
584         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
585             assertEquals('A', in.read());
586             assertEquals('B', in.read());
587             assertEquals('C', in.read());
588             assertEquals(-1, in.read());
589             assertFalse(in.hasBOM(), "hasBOM()");
590             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
591             assertNull(in.getBOM(), "getBOM");
592         }
593     }
594 
595     @Test
596     public void testReadXmlWithBOMUcs2() throws Exception {
597         assumeFalse(System.getProperty("java.vendor").contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
598 
599         // UCS-2 is BE.
600         assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
601         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
602         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
603             parseXml(in);
604         }
605         parseXml(createUtf16BeDataStream(data, true));
606     }
607 
608     @Test
609     public void testReadXmlWithBOMUcs4() throws Exception {
610         // UCS-4 is BE or LE?
611         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
612         assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
613         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
614         // XML parser does not know what to do with UTF-32
615         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
616             parseXml(in);
617             // XML parser does not know what to do with UTF-32
618             assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
619         }
620         parseXml(createUtf32BeDataStream(data, true));
621     }
622 
623     @Test
624     public void testReadXmlWithBOMUtf16Be() throws Exception {
625         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
626         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
627             parseXml(in);
628         }
629         parseXml(createUtf16BeDataStream(data, true));
630     }
631 
632     @Test
633     public void testReadXmlWithBOMUtf16Le() throws Exception {
634         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
635         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
636             parseXml(in);
637         }
638         parseXml(createUtf16LeDataStream(data, true));
639     }
640 
641     @Test
642     public void testReadXmlWithBOMUtf32Be() throws Exception {
643         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
644         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
645         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
646             parseXml(in);
647         }
648         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
649         try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
650             parseXml(in);
651         }
652     }
653 
654     @Test
655     public void testReadXmlWithBOMUtf32Le() throws Exception {
656         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
657         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
658         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
659             parseXml(in);
660         }
661         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
662         try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
663             parseXml(in);
664         }
665     }
666 
667     @Test
668     public void testReadXmlWithBOMUtf8() throws Exception {
669         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
670         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
671             parseXml(in);
672         }
673         parseXml(createUtf8Input(data, true));
674     }
675 
676     @Test
677     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
678         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
679         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
680         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
681             parseXml(in);
682         }
683         parseXml(createUtf32BeDataStream(data, false));
684     }
685 
686     @Test
687     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
688         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
689         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
690         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
691             parseXml(in);
692         }
693         parseXml(createUtf32BeDataStream(data, false));
694     }
695 
696     @Test
697     public void testSkipReturnValueWithBom() throws IOException {
698         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
699         try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
700             assertEquals(2, is1.skip(2));
701             assertEquals((byte) 0x33, is1.read());
702         }
703     }
704 
705     @Test
706     public void testSkipReturnValueWithoutBom() throws IOException {
707         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
708         try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
709             assertEquals(2, is2.skip(2)); // IO-428
710             assertEquals((byte) 0x33, is2.read());
711         }
712     }
713 
714     @Test
715     public void testSkipWithBOM() throws Exception {
716         final byte[] data = { 'A', 'B', 'C', 'D' };
717         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
718             in.skip(2L);
719             assertEquals('C', in.read());
720         }
721     }
722 
723     @Test
724     public void testSkipWithoutBOM() throws Exception {
725         final byte[] data = { 'A', 'B', 'C', 'D' };
726         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
727             in.skip(2L);
728             assertEquals('C', in.read());
729         }
730     }
731 
732     @Test
733     public void testSmallBufferWithBOM() throws Exception {
734         final byte[] data = { 'A', 'B', 'C' };
735         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
736             final byte[] buf = new byte[1024];
737             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
738             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
739         }
740     }
741 
742     @Test
743     public void testSmallBufferWithoutBOM() throws Exception {
744         final byte[] data = { 'A', 'B', 'C' };
745         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
746             final byte[] buf = new byte[1024];
747             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
748             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
749         }
750     }
751 
752     @Test
753     // make sure that our support code works as expected
754     public void testSupportCode() throws Exception {
755         try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
756             final byte[] buf = new byte[1024];
757             final int len = in.read(buf);
758             assertEquals(5, len);
759             assertEquals(0xEF, buf[0] & 0xFF);
760             assertEquals(0xBB, buf[1] & 0xFF);
761             assertEquals(0xBF, buf[2] & 0xFF);
762             assertEquals('A', buf[3] & 0xFF);
763             assertEquals('B', buf[4] & 0xFF);
764 
765             assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
766         }
767     }
768 }