View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  import static org.junit.jupiter.api.Assumptions.assumeFalse;
26  import static org.junit.jupiter.api.Assumptions.assumeTrue;
27  
28  import java.io.ByteArrayInputStream;
29  import java.io.IOException;
30  import java.io.InputStream;
31  import java.io.Reader;
32  import java.nio.charset.Charset;
33  import java.nio.charset.StandardCharsets;
34  
35  import javax.xml.parsers.DocumentBuilder;
36  import javax.xml.parsers.DocumentBuilderFactory;
37  import javax.xml.parsers.ParserConfigurationException;
38  
39  import org.apache.commons.io.ByteOrderMark;
40  import org.junit.jupiter.api.Test;
41  import org.w3c.dom.Document;
42  import org.xml.sax.InputSource;
43  import org.xml.sax.SAXException;
44  import org.xml.sax.SAXParseException;
45  
46  /**
47   * Test case for {@link BOMInputStream}.
48   */
49  @SuppressWarnings("ResultOfMethodCallIgnored")
50  public class BOMInputStreamTest {
51  
52      /**
53       *  A mock InputStream that expects {@code close()} to be called.
54       */
55      private static final class ExpectCloseInputStream extends InputStream {
56          private boolean _closeCalled;
57  
58          public void assertCloseCalled() {
59              assertTrue(_closeCalled);
60          }
61  
62          @Override
63          public void close() throws IOException {
64              _closeCalled = true;
65          }
66  
67          @Override
68          public int read() throws IOException {
69              return -1;
70          }
71      }
72  
73      private void assertData(final byte[] expected, final byte[] actual, final int len) {
74          assertEquals(expected.length, len, "length");
75          for (int ii = 0; ii < expected.length; ii++) {
76              assertEquals(expected[ii], actual[ii], "byte " + ii);
77          }
78      }
79  
80      /**
81       *  Creates the underlying data stream, with or without BOM.
82       */
83      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
84          byte[] data = baseData;
85          if (addBOM) {
86              data = new byte[baseData.length + 2];
87              data[0] = (byte) 0xFE;
88              data[1] = (byte) 0xFF;
89              System.arraycopy(baseData, 0, data, 2, baseData.length);
90          }
91          return new ByteArrayInputStream(data);
92      }
93  
94      /**
95       *  Creates the underlying data stream, with or without BOM.
96       */
97      private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
98          byte[] data = baseData;
99          if (addBOM) {
100             data = new byte[baseData.length + 2];
101             data[0] = (byte) 0xFF;
102             data[1] = (byte) 0xFE;
103             System.arraycopy(baseData, 0, data, 2, baseData.length);
104         }
105         return new ByteArrayInputStream(data);
106     }
107 
108     /**
109      *  Creates the underlying data stream, with or without BOM.
110      */
111     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
112         byte[] data = baseData;
113         if (addBOM) {
114             data = new byte[baseData.length + 4];
115             data[0] = 0;
116             data[1] = 0;
117             data[2] = (byte) 0xFE;
118             data[3] = (byte) 0xFF;
119             System.arraycopy(baseData, 0, data, 4, baseData.length);
120         }
121         return new ByteArrayInputStream(data);
122     }
123 
124     /**
125      *  Creates the underlying data stream, with or without BOM.
126      */
127     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
128         byte[] data = baseData;
129         if (addBOM) {
130             data = new byte[baseData.length + 4];
131             data[0] = (byte) 0xFF;
132             data[1] = (byte) 0xFE;
133             data[2] = 0;
134             data[3] = 0;
135             System.arraycopy(baseData, 0, data, 4, baseData.length);
136         }
137         return new ByteArrayInputStream(data);
138     }
139 
140     /**
141      *  Creates the underlying data stream, with or without BOM.
142      */
143     private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
144         byte[] data = baseData;
145         if (addBOM) {
146             data = new byte[baseData.length + 3];
147             data[0] = (byte) 0xEF;
148             data[1] = (byte) 0xBB;
149             data[2] = (byte) 0xBF;
150             System.arraycopy(baseData, 0, data, 3, baseData.length);
151         }
152         return new ByteArrayInputStream(data);
153     }
154 
155     private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
156         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
157         try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
158                 .setCharset(charsetName).get()) {
159             final InputSource is = new InputSource(byteStream);
160             is.setEncoding(charsetName);
161             documentBuilder.parse(is);
162         } catch (final SAXParseException e) {
163             if (e.getMessage().contains(charsetName)) {
164                 return false;
165             }
166         }
167         return true;
168     }
169 
170     private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
171         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
172     }
173 
174     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
175         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
176         assertNotNull(doc);
177         assertEquals("X", doc.getFirstChild().getNodeName());
178     }
179 
180     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
181         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
182         assertNotNull(doc);
183         assertEquals("X", doc.getFirstChild().getNodeName());
184     }
185 
186     private void readBOMInputStreamTwice(final String resource) throws Exception {
187         try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
188             assertNotNull(inputStream);
189             try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
190                 bomInputStream.mark(1_000_000);
191 
192                 this.readFile(bomInputStream);
193                 bomInputStream.reset();
194                 this.readFile(bomInputStream);
195                 inputStream.close();
196             }
197         }
198     }
199 
200     private void readFile(final BOMInputStream bomInputStream) throws Exception {
201         int bytes;
202         final byte[] bytesFromStream = new byte[100];
203         do {
204             bytes = bomInputStream.read(bytesFromStream);
205         } while (bytes > 0);
206     }
207 
208     @Test
209     public void testAvailableWithBOM() throws Exception {
210         final byte[] data = { 'A', 'B', 'C', 'D' };
211         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
212             assertEquals(7, in.available());
213         }
214     }
215 
216     @Test
217     public void testAvailableWithoutBOM() throws Exception {
218         final byte[] data = { 'A', 'B', 'C', 'D' };
219         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
220             assertEquals(4, in.available());
221         }
222     }
223 
224     @Test
225     // this is here for coverage
226     public void testClose() throws Exception {
227         try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
228             try (InputStream in = new BOMInputStream(del)) {
229                 // nothing
230             }
231             del.assertCloseCalled();
232         }
233     }
234 
235     @Test
236     public void testEmptyBufferWithBOM() throws Exception {
237         final byte[] data = {};
238         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
239             final byte[] buf = new byte[1024];
240             assertEquals(-1, in.read(buf));
241         }
242     }
243 
244     @Test
245     public void testEmptyBufferWithoutBOM() throws Exception {
246         final byte[] data = {};
247         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
248             final byte[] buf = new byte[1024];
249             assertEquals(-1, in.read(buf));
250         }
251     }
252 
253     @Test
254     public void testGetBOMFirstThenRead() throws Exception {
255         final byte[] data = { 'A', 'B', 'C' };
256         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
257             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
258             assertTrue(in.hasBOM(), "hasBOM()");
259             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
260             assertEquals('A', in.read());
261             assertEquals('B', in.read());
262             assertEquals('C', in.read());
263             assertEquals(-1, in.read());
264         }
265     }
266 
267     @Test
268     public void testGetBOMFirstThenReadInclude() throws Exception {
269         final byte[] data = { 'A', 'B', 'C' };
270         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
271             assertTrue(in.hasBOM(), "hasBOM()");
272             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
273             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
274             assertEquals(0xEF, in.read());
275             assertEquals(0xBB, in.read());
276             assertEquals(0xBF, in.read());
277             assertEquals('A', in.read());
278             assertEquals('B', in.read());
279             assertEquals('C', in.read());
280             assertEquals(-1, in.read());
281         }
282     }
283 
284     @Test
285     public void testLargeBufferWithBOM() throws Exception {
286         final byte[] data = { 'A', 'B', 'C' };
287         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
288             final byte[] buf = new byte[1024];
289             assertData(data, buf, in.read(buf));
290         }
291     }
292 
293     @Test
294     public void testLargeBufferWithoutBOM() throws Exception {
295         final byte[] data = { 'A', 'B', 'C' };
296         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
297             final byte[] buf = new byte[1024];
298             assertData(data, buf, in.read(buf));
299         }
300     }
301 
302     @Test
303     public void testLeadingNonBOMBufferedRead() throws Exception {
304         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
305         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
306             final byte[] buf = new byte[1024];
307             assertData(data, buf, in.read(buf));
308         }
309     }
310 
311     @Test
312     public void testLeadingNonBOMSingleRead() throws Exception {
313         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
314         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
315             assertEquals(0xEF, in.read());
316             assertEquals(0xAB, in.read());
317             assertEquals(0xCD, in.read());
318             assertEquals(-1, in.read());
319         }
320     }
321 
322     @Test
323     public void testMarkResetAfterReadWithBOM() throws Exception {
324         final byte[] data = { 'A', 'B', 'C', 'D' };
325         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
326             assertTrue(in.markSupported());
327 
328             in.read();
329             in.mark(10);
330 
331             in.read();
332             in.read();
333             in.reset();
334             assertEquals('B', in.read());
335         }
336     }
337 
338     @Test
339     public void testMarkResetAfterReadWithoutBOM() throws Exception {
340         final byte[] data = { 'A', 'B', 'C', 'D' };
341         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
342             assertTrue(in.markSupported());
343 
344             in.read();
345             in.mark(10);
346 
347             in.read();
348             in.read();
349             in.reset();
350             assertEquals('B', in.read());
351         }
352     }
353 
354     @Test
355     public void testMarkResetBeforeReadWithBOM() throws Exception {
356         final byte[] data = { 'A', 'B', 'C', 'D' };
357         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
358             assertTrue(in.markSupported());
359 
360             in.mark(10);
361 
362             in.read();
363             in.read();
364             in.reset();
365             assertEquals('A', in.read());
366         }
367     }
368 
369     @Test
370     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
371         final byte[] data = { 'A', 'B', 'C', 'D' };
372         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
373             assertTrue(in.markSupported());
374 
375             in.mark(10);
376 
377             in.read();
378             in.read();
379             in.reset();
380             assertEquals('A', in.read());
381         }
382     }
383 
384     @Test
385     public void testNoBoms() throws Exception {
386         final byte[] data = { 'A', 'B', 'C' };
387         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
388         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
389         //
390         try (final BOMInputStream bomInputStream = BOMInputStream.builder()
391                 .setInputStream(createUtf8Input(data, true))
392                 .setInclude(true)
393                 .setByteOrderMarks((ByteOrderMark[]) null)
394                 .get()) {
395             assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
396         }
397         assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
398                 .setInputStream(createUtf8Input(data, true))
399                 .setInclude(true)
400                 .setByteOrderMarks()
401                 .get()
402                 .close());
403     }
404 
405     @Test
406     public void testReadEmpty() throws Exception {
407         final byte[] data = {};
408         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
409             assertEquals(-1, in.read());
410             assertFalse(in.hasBOM(), "hasBOM()");
411             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
412             assertNull(in.getBOM(), "getBOM");
413         }
414     }
415 
416     @Test
417     public void testReadSmall() throws Exception {
418         final byte[] data = { 'A', 'B' };
419         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
420             assertEquals('A', in.read());
421             assertEquals('B', in.read());
422             assertEquals(-1, in.read());
423             assertFalse(in.hasBOM(), "hasBOM()");
424             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
425             assertNull(in.getBOM(), "getBOM");
426         }
427     }
428 
429     @Test
430     public void testReadTwiceWithBOM() throws Exception {
431         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
432     }
433 
434     @Test
435     public void testReadTwiceWithoutBOM() throws Exception {
436         this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
437     }
438 
439     @Test
440     public void testReadWithBOMInclude() throws Exception {
441         final byte[] data = { 'A', 'B', 'C' };
442         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
443             assertEquals(0xEF, in.read());
444             assertEquals(0xBB, in.read());
445             assertEquals(0xBF, in.read());
446             assertEquals('A', in.read());
447             assertEquals('B', in.read());
448             assertEquals('C', in.read());
449             assertEquals(-1, in.read());
450             assertTrue(in.hasBOM(), "hasBOM()");
451             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
452             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
453         }
454     }
455 
456     @Test
457     public void testReadWithBOMUtf16Be() throws Exception {
458         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
459         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
460             assertEquals(0, in.read());
461             assertEquals('A', in.read());
462             assertEquals(0, in.read());
463             assertEquals('B', in.read());
464             assertEquals(0, in.read());
465             assertEquals('C', in.read());
466             assertEquals(-1, in.read());
467             assertTrue(in.hasBOM(), "hasBOM()");
468             assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
469             assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
470             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
471         }
472     }
473 
474     @Test
475     public void testReadWithBOMUtf16Le() throws Exception {
476         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
477         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
478             assertEquals('A', in.read());
479             assertEquals(0, in.read());
480             assertEquals('B', in.read());
481             assertEquals(0, in.read());
482             assertEquals('C', in.read());
483             assertEquals(0, in.read());
484             assertEquals(-1, in.read());
485             assertTrue(in.hasBOM(), "hasBOM()");
486             assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
487             assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
488             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
489         }
490     }
491 
492     @Test
493     public void testReadWithBOMUtf32Be() throws Exception {
494         assumeTrue(Charset.isSupported("UTF_32BE"));
495         final byte[] data = "ABC".getBytes("UTF_32BE");
496         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
497                 ByteOrderMark.UTF_32BE)) {
498             assertEquals(0, in.read());
499             assertEquals(0, in.read());
500             assertEquals(0, in.read());
501             assertEquals('A', in.read());
502             assertEquals(0, in.read());
503             assertEquals(0, in.read());
504             assertEquals(0, in.read());
505             assertEquals('B', in.read());
506             assertEquals(0, in.read());
507             assertEquals(0, in.read());
508             assertEquals(0, in.read());
509             assertEquals('C', in.read());
510             assertEquals(-1, in.read());
511             assertTrue(in.hasBOM(), "hasBOM()");
512             assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
513             assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
514             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
515         }
516     }
517 
518     @Test
519     public void testReadWithBOMUtf32Le() throws Exception {
520         assumeTrue(Charset.isSupported("UTF_32LE"));
521         final byte[] data = "ABC".getBytes("UTF_32LE");
522         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
523                 ByteOrderMark.UTF_32LE)) {
524             assertEquals('A', in.read());
525             assertEquals(0, in.read());
526             assertEquals(0, in.read());
527             assertEquals(0, in.read());
528             assertEquals('B', in.read());
529             assertEquals(0, in.read());
530             assertEquals(0, in.read());
531             assertEquals(0, in.read());
532             assertEquals('C', in.read());
533             assertEquals(0, in.read());
534             assertEquals(0, in.read());
535             assertEquals(0, in.read());
536             assertEquals(-1, in.read());
537             assertTrue(in.hasBOM(), "hasBOM()");
538             assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
539             assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
540             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
541         }
542     }
543 
544     @Test
545     public void testReadWithBOMUtf8() throws Exception {
546         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
547         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
548             assertEquals('A', in.read());
549             assertEquals('B', in.read());
550             assertEquals('C', in.read());
551             assertEquals(-1, in.read());
552             assertTrue(in.hasBOM(), "hasBOM()");
553             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
554             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
555             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
556         }
557     }
558 
559     @Test
560     public void testReadWithMultipleBOM() throws Exception {
561         final byte[] data = { 'A', 'B', 'C' };
562         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
563                 ByteOrderMark.UTF_8)) {
564             assertEquals('A', in.read());
565             assertEquals('B', in.read());
566             assertEquals('C', in.read());
567             assertEquals(-1, in.read());
568             assertTrue(in.hasBOM(), "hasBOM()");
569             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
570             assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
571             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
572         }
573     }
574 
575     @Test
576     public void testReadWithoutBOM() throws Exception {
577         final byte[] data = { 'A', 'B', 'C' };
578         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
579             assertEquals('A', in.read());
580             assertEquals('B', in.read());
581             assertEquals('C', in.read());
582             assertEquals(-1, in.read());
583             assertFalse(in.hasBOM(), "hasBOM()");
584             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
585             assertNull(in.getBOM(), "getBOM");
586         }
587     }
588 
589     @Test
590     public void testReadXmlWithBOMUcs2() throws Exception {
591         assumeFalse(System.getProperty("java.vendor").contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
592 
593         // UCS-2 is BE.
594         assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
595         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
596         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
597             parseXml(in);
598         }
599         parseXml(createUtf16BeDataStream(data, true));
600     }
601 
602     @Test
603     public void testReadXmlWithBOMUcs4() throws Exception {
604         // UCS-4 is BE or LE?
605         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
606         assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
607         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
608         // XML parser does not know what to do with UTF-32
609         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
610             parseXml(in);
611             // XML parser does not know what to do with UTF-32
612             assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
613         }
614         parseXml(createUtf32BeDataStream(data, true));
615     }
616 
617     @Test
618     public void testReadXmlWithBOMUtf16Be() throws Exception {
619         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
620         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
621             parseXml(in);
622         }
623         parseXml(createUtf16BeDataStream(data, true));
624     }
625 
626     @Test
627     public void testReadXmlWithBOMUtf16Le() throws Exception {
628         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
629         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
630             parseXml(in);
631         }
632         parseXml(createUtf16LeDataStream(data, true));
633     }
634 
635     @Test
636     public void testReadXmlWithBOMUtf32Be() throws Exception {
637         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
638         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
639         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
640             parseXml(in);
641         }
642         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
643         try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
644             parseXml(in);
645         }
646     }
647 
648     @Test
649     public void testReadXmlWithBOMUtf32Le() throws Exception {
650         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
651         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
652         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
653             parseXml(in);
654         }
655         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
656         try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
657             parseXml(in);
658         }
659     }
660 
661     @Test
662     public void testReadXmlWithBOMUtf8() throws Exception {
663         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
664         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
665             parseXml(in);
666         }
667         parseXml(createUtf8Input(data, true));
668     }
669 
670     @Test
671     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
672         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
673         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
674         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
675             parseXml(in);
676         }
677         parseXml(createUtf32BeDataStream(data, false));
678     }
679 
680     @Test
681     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
682         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
683         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
684         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
685             parseXml(in);
686         }
687         parseXml(createUtf32BeDataStream(data, false));
688     }
689 
690 
691     @Test
692     public void testSkipReturnValueWithBom() throws IOException {
693         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
694         try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
695             assertEquals(2, is1.skip(2));
696             assertEquals((byte) 0x33, is1.read());
697         }
698     }
699 
700     @Test
701     public void testSkipReturnValueWithoutBom() throws IOException {
702         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
703         try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
704             assertEquals(2, is2.skip(2)); // IO-428
705             assertEquals((byte) 0x33, is2.read());
706         }
707     }
708 
709     @Test
710     public void testSkipWithBOM() throws Exception {
711         final byte[] data = { 'A', 'B', 'C', 'D' };
712         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
713             in.skip(2L);
714             assertEquals('C', in.read());
715         }
716     }
717 
718     @Test
719     public void testSkipWithoutBOM() throws Exception {
720         final byte[] data = { 'A', 'B', 'C', 'D' };
721         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
722             in.skip(2L);
723             assertEquals('C', in.read());
724         }
725     }
726 
727     @Test
728     public void testSmallBufferWithBOM() throws Exception {
729         final byte[] data = { 'A', 'B', 'C' };
730         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
731             final byte[] buf = new byte[1024];
732             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
733             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
734         }
735     }
736 
737     @Test
738     public void testSmallBufferWithoutBOM() throws Exception {
739         final byte[] data = { 'A', 'B', 'C' };
740         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
741             final byte[] buf = new byte[1024];
742             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
743             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
744         }
745     }
746 
747     @Test
748     // make sure that our support code works as expected
749     public void testSupportCode() throws Exception {
750         try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
751             final byte[] buf = new byte[1024];
752             final int len = in.read(buf);
753             assertEquals(5, len);
754             assertEquals(0xEF, buf[0] & 0xFF);
755             assertEquals(0xBB, buf[1] & 0xFF);
756             assertEquals(0xBF, buf[2] & 0xFF);
757             assertEquals('A', buf[3] & 0xFF);
758             assertEquals('B', buf[4] & 0xFF);
759 
760             assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
761         }
762     }
763 }