View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  import static org.junit.jupiter.api.Assumptions.assumeFalse;
27  import static org.junit.jupiter.api.Assumptions.assumeTrue;
28  
29  import java.io.ByteArrayInputStream;
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.Reader;
33  import java.nio.charset.Charset;
34  import java.nio.charset.StandardCharsets;
35  import java.util.concurrent.atomic.AtomicBoolean;
36  
37  import javax.xml.parsers.DocumentBuilder;
38  import javax.xml.parsers.DocumentBuilderFactory;
39  import javax.xml.parsers.ParserConfigurationException;
40  
41  import org.apache.commons.io.ByteOrderMark;
42  import org.apache.commons.io.IOUtils;
43  import org.apache.commons.io.test.CustomIOException;
44  import org.apache.commons.lang3.SystemProperties;
45  import org.junit.jupiter.api.Test;
46  import org.w3c.dom.Document;
47  import org.xml.sax.InputSource;
48  import org.xml.sax.SAXException;
49  import org.xml.sax.SAXParseException;
50  
51  /**
52   * Test case for {@link BOMInputStream}.
53   */
54  @SuppressWarnings("ResultOfMethodCallIgnored")
55  class BOMInputStreamTest {
56  
57      /**
58       *  A mock InputStream that tracks if {@code close()} is called.
59       */
60      private static final class ExpectCloseInputStream extends InputStream {
61          private boolean closed;
62  
63          public void assertCloseCalled() {
64              assertTrue(closed);
65          }
66  
67          @Override
68          public void close() throws IOException {
69              closed = true;
70          }
71  
72          @Override
73          public int read() throws IOException {
74              return -1;
75          }
76      }
77  
78      private void assertData(final byte[] expected, final byte[] actual, final int len) {
79          assertEquals(expected.length, len, "length");
80          for (int ii = 0; ii < expected.length; ii++) {
81              assertEquals(expected[ii], actual[ii], "byte " + ii);
82          }
83      }
84  
85      /**
86       *  Creates the underlying data stream, with or without BOM.
87       */
88      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
89          byte[] data = baseData;
90          if (addBOM) {
91              data = new byte[baseData.length + 2];
92              data[0] = (byte) 0xFE;
93              data[1] = (byte) 0xFF;
94              System.arraycopy(baseData, 0, data, 2, baseData.length);
95          }
96          return new ByteArrayInputStream(data);
97      }
98  
99      /**
100      *  Creates the underlying data stream, with or without BOM.
101      */
102     private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
103         byte[] data = baseData;
104         if (addBOM) {
105             data = new byte[baseData.length + 2];
106             data[0] = (byte) 0xFF;
107             data[1] = (byte) 0xFE;
108             System.arraycopy(baseData, 0, data, 2, baseData.length);
109         }
110         return new ByteArrayInputStream(data);
111     }
112 
113     /**
114      *  Creates the underlying data stream, with or without BOM.
115      */
116     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
117         byte[] data = baseData;
118         if (addBOM) {
119             data = new byte[baseData.length + 4];
120             data[0] = 0;
121             data[1] = 0;
122             data[2] = (byte) 0xFE;
123             data[3] = (byte) 0xFF;
124             System.arraycopy(baseData, 0, data, 4, baseData.length);
125         }
126         return new ByteArrayInputStream(data);
127     }
128 
129     /**
130      *  Creates the underlying data stream, with or without BOM.
131      */
132     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
133         byte[] data = baseData;
134         if (addBOM) {
135             data = new byte[baseData.length + 4];
136             data[0] = (byte) 0xFF;
137             data[1] = (byte) 0xFE;
138             data[2] = 0;
139             data[3] = 0;
140             System.arraycopy(baseData, 0, data, 4, baseData.length);
141         }
142         return new ByteArrayInputStream(data);
143     }
144 
145     /**
146      *  Creates the underlying data stream, with or without BOM.
147      */
148     private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
149         byte[] data = baseData;
150         if (addBOM) {
151             data = new byte[baseData.length + 3];
152             data[0] = (byte) 0xEF;
153             data[1] = (byte) 0xBB;
154             data[2] = (byte) 0xBF;
155             System.arraycopy(baseData, 0, data, 3, baseData.length);
156         }
157         return new ByteArrayInputStream(data);
158     }
159 
160     private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
161         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
162         try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
163                 .setCharset(charsetName).get()) {
164             final InputSource is = new InputSource(byteStream);
165             is.setEncoding(charsetName);
166             documentBuilder.parse(is);
167         } catch (final SAXParseException e) {
168             if (e.getMessage().contains(charsetName)) {
169                 return false;
170             }
171         }
172         return true;
173     }
174 
175     private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
176         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
177     }
178 
179     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
180         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
181         assertNotNull(doc);
182         assertEquals("X", doc.getFirstChild().getNodeName());
183     }
184 
185     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
186         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
187         assertNotNull(doc);
188         assertEquals("X", doc.getFirstChild().getNodeName());
189     }
190 
191     private void readBOMInputStreamTwice(final String resource) throws Exception {
192         try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
193             assertNotNull(inputStream);
194             try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
195                 bomInputStream.mark(1_000_000);
196                 readFile(bomInputStream);
197                 bomInputStream.reset();
198                 readFile(bomInputStream);
199                 inputStream.close();
200             }
201         }
202     }
203 
204     private void readFile(final BOMInputStream bomInputStream) throws Exception {
205         int bytes;
206         final byte[] bytesFromStream = new byte[100];
207         do {
208             bytes = bomInputStream.read(bytesFromStream);
209         } while (bytes > 0);
210     }
211 
212     @Test
213     void testAfterReadConsumer() throws Exception {
214         final byte[] data = { 'A', 'B', 'C', 'D' };
215         final AtomicBoolean boolRef = new AtomicBoolean();
216         // @formatter:off
217         try (BOMInputStream bounded = BOMInputStream.builder()
218                 .setInputStream(createUtf8Input(data, true))
219                 .setAfterRead(i -> boolRef.set(true))
220                 .get()) {
221             IOUtils.consume(bounded);
222         }
223         // @formatter:on
224         assertTrue(boolRef.get());
225         // Throwing
226         final String message = "test exception message";
227         // @formatter:off
228         assertEquals(message, assertThrowsExactly(CustomIOException.class, () -> BOMInputStream.builder()
229                 .setInputStream(createUtf8Input(data, true))
230                 .setAfterRead(i -> {
231                     throw new CustomIOException(message);
232                 })
233                 .get()).getMessage());
234         // @formatter:on
235     }
236 
237     @Test
238     void testAvailableWithBOMAfterClose() throws Exception {
239         final byte[] data = { 'A', 'B', 'C', 'D' };
240         final InputStream shadow;
241         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
242             assertEquals(4, in.available());
243             shadow = in;
244         }
245         assertEquals(0, shadow.available());
246     }
247 
248     @Test
249     void testAvailableWithBOMAfterOpen() throws Exception {
250         final byte[] data = { 'A', 'B', 'C', 'D' };
251         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
252             assertEquals(4, in.available());
253         }
254     }
255 
256     @Test
257     void testAvailableWithoutBOM() throws Exception {
258         final byte[] data = { 'A', 'B', 'C', 'D' };
259         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
260             assertEquals(1, in.available());
261         }
262     }
263 
264     @Test
265     void testBuilderGet() {
266         // java.lang.IllegalStateException: origin == null
267         assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
268     }
269 
270     @Test
271     // this is here for coverage
272     void testClose() throws Exception {
273         try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
274             try (BOMInputStream in = new BOMInputStream(del)) {
275                 // nothing
276             }
277             del.assertCloseCalled();
278         }
279     }
280 
281     @Test
282     void testCloseHandleIOException() throws IOException {
283         final IOException exception = new IOException();
284         ProxyInputStreamTest.testCloseHandleIOException(BOMInputStream.builder().setInputStream(new BrokenInputStream(() -> exception) {
285 
286             @Override
287             public int read() throws IOException {
288                 return 'X';
289             }
290         }).get());
291     }
292 
293     @Test
294     void testEmptyBufferWithBOM() throws Exception {
295         final byte[] data = {};
296         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
297             final byte[] buf = new byte[1024];
298             assertEquals(-1, in.read(buf));
299         }
300     }
301 
302     @Test
303     void testEmptyBufferWithoutBOM() throws Exception {
304         final byte[] data = {};
305         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
306             final byte[] buf = new byte[1024];
307             assertEquals(-1, in.read(buf));
308         }
309     }
310 
311     @Test
312     void testGetBOMFirstThenRead() throws Exception {
313         final byte[] data = { 'A', 'B', 'C' };
314         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
315             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
316             assertTrue(in.hasBOM(), "hasBOM()");
317             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
318             assertEquals('A', in.read());
319             assertEquals('B', in.read());
320             assertEquals('C', in.read());
321             assertEquals(-1, in.read());
322         }
323     }
324 
325     @Test
326     void testGetBOMFirstThenReadInclude() throws Exception {
327         final byte[] data = { 'A', 'B', 'C' };
328         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
329             assertTrue(in.hasBOM(), "hasBOM()");
330             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
331             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
332             assertEquals(0xEF, in.read());
333             assertEquals(0xBB, in.read());
334             assertEquals(0xBF, in.read());
335             assertEquals('A', in.read());
336             assertEquals('B', in.read());
337             assertEquals('C', in.read());
338             assertEquals(-1, in.read());
339         }
340     }
341 
342     @Test
343     void testLargeBufferWithBOM() throws Exception {
344         final byte[] data = { 'A', 'B', 'C' };
345         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
346             final byte[] buf = new byte[1024];
347             assertData(data, buf, in.read(buf));
348         }
349     }
350 
351     @Test
352     void testLargeBufferWithoutBOM() throws Exception {
353         final byte[] data = { 'A', 'B', 'C' };
354         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
355             final byte[] buf = new byte[1024];
356             assertData(data, buf, in.read(buf));
357         }
358     }
359 
360     @Test
361     void testLeadingNonBOMBufferedRead() throws Exception {
362         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
363         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
364             final byte[] buf = new byte[1024];
365             assertData(data, buf, in.read(buf));
366         }
367     }
368 
369     @Test
370     void testLeadingNonBOMSingleRead() throws Exception {
371         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
372         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
373             assertEquals(0xEF, in.read());
374             assertEquals(0xAB, in.read());
375             assertEquals(0xCD, in.read());
376             assertEquals(-1, in.read());
377         }
378     }
379 
380     @Test
381     void testMarkResetAfterReadWithBOM() throws Exception {
382         final byte[] data = { 'A', 'B', 'C', 'D' };
383         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
384             assertTrue(in.markSupported());
385             in.read();
386             in.mark(10);
387             in.read();
388             in.read();
389             in.reset();
390             assertEquals('B', in.read());
391         }
392     }
393 
394     @Test
395     void testMarkResetAfterReadWithoutBOM() throws Exception {
396         final byte[] data = { 'A', 'B', 'C', 'D' };
397         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
398             assertTrue(in.markSupported());
399             in.read();
400             in.mark(10);
401             in.read();
402             in.read();
403             in.reset();
404             assertEquals('B', in.read());
405         }
406     }
407 
408     @Test
409     void testMarkResetBeforeReadWithBOM() throws Exception {
410         final byte[] data = { 'A', 'B', 'C', 'D' };
411         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
412             assertTrue(in.markSupported());
413             in.mark(10);
414             in.read();
415             in.read();
416             in.reset();
417             assertEquals('A', in.read());
418         }
419     }
420 
421     @Test
422     void testMarkResetBeforeReadWithoutBOM() throws Exception {
423         final byte[] data = { 'A', 'B', 'C', 'D' };
424         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
425             assertTrue(in.markSupported());
426             in.mark(10);
427             in.read();
428             in.read();
429             in.reset();
430             assertEquals('A', in.read());
431         }
432     }
433 
434     @Test
435     void testNoBoms() throws Exception {
436         final byte[] data = { 'A', 'B', 'C' };
437         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
438         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
439         //
440         try (BOMInputStream bomInputStream = BOMInputStream.builder()
441                 .setInputStream(createUtf8Input(data, true))
442                 .setInclude(true)
443                 .setByteOrderMarks((ByteOrderMark[]) null)
444                 .get()) {
445             assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
446         }
447         assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
448                 .setInputStream(createUtf8Input(data, true))
449                 .setInclude(true)
450                 .setByteOrderMarks()
451                 .get()
452                 .close());
453     }
454 
455     @Test
456     void testReadAfterClose() throws Exception {
457         final byte[] data = { 'A', 'B', 'C', 'D' };
458         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
459             assertEquals(4, in.available());
460             in.close();
461             assertThrows(IOException.class, in::read);
462         }
463     }
464 
465     @Test
466     void testReadEmpty() throws Exception {
467         final byte[] data = {};
468         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
469             assertEquals(-1, in.read());
470             assertFalse(in.hasBOM(), "hasBOM()");
471             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
472             assertNull(in.getBOM(), "getBOM");
473         }
474     }
475 
476     @Test
477     void testReadSmall() throws Exception {
478         final byte[] data = { 'A', 'B' };
479         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
480             assertEquals('A', in.read());
481             assertEquals('B', in.read());
482             assertEquals(-1, in.read());
483             assertFalse(in.hasBOM(), "hasBOM()");
484             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
485             assertNull(in.getBOM(), "getBOM");
486         }
487     }
488 
489     @Test
490     void testReadTwiceWithBOM() throws Exception {
491         readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
492     }
493 
494     @Test
495     void testReadTwiceWithoutBOM() throws Exception {
496         readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
497     }
498 
499     @Test
500     void testReadWithBOMInclude() throws Exception {
501         final byte[] data = { 'A', 'B', 'C' };
502         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
503             assertEquals(0xEF, in.read());
504             assertEquals(0xBB, in.read());
505             assertEquals(0xBF, in.read());
506             assertEquals('A', in.read());
507             assertEquals('B', in.read());
508             assertEquals('C', in.read());
509             assertEquals(-1, in.read());
510             assertTrue(in.hasBOM(), "hasBOM()");
511             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
512             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
513         }
514     }
515 
516     @Test
517     void testReadWithBOMUtf16Be() throws Exception {
518         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
519         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
520             assertEquals(0, in.read());
521             assertEquals('A', in.read());
522             assertEquals(0, in.read());
523             assertEquals('B', in.read());
524             assertEquals(0, in.read());
525             assertEquals('C', in.read());
526             assertEquals(-1, in.read());
527             assertTrue(in.hasBOM(), "hasBOM()");
528             assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
529             assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
530             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
531         }
532     }
533 
534     @Test
535     void testReadWithBOMUtf16Le() throws Exception {
536         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
537         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
538             assertEquals('A', in.read());
539             assertEquals(0, in.read());
540             assertEquals('B', in.read());
541             assertEquals(0, in.read());
542             assertEquals('C', in.read());
543             assertEquals(0, in.read());
544             assertEquals(-1, in.read());
545             assertTrue(in.hasBOM(), "hasBOM()");
546             assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
547             assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
548             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
549         }
550     }
551 
552     @Test
553     void testReadWithBOMUtf32Be() throws Exception {
554         assumeTrue(Charset.isSupported("UTF_32BE"));
555         final byte[] data = "ABC".getBytes("UTF_32BE");
556         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
557                 ByteOrderMark.UTF_32BE)) {
558             assertEquals(0, in.read());
559             assertEquals(0, in.read());
560             assertEquals(0, in.read());
561             assertEquals('A', in.read());
562             assertEquals(0, in.read());
563             assertEquals(0, in.read());
564             assertEquals(0, in.read());
565             assertEquals('B', in.read());
566             assertEquals(0, in.read());
567             assertEquals(0, in.read());
568             assertEquals(0, in.read());
569             assertEquals('C', in.read());
570             assertEquals(-1, in.read());
571             assertTrue(in.hasBOM(), "hasBOM()");
572             assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
573             assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
574             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
575         }
576     }
577 
578     @Test
579     void testReadWithBOMUtf32Le() throws Exception {
580         assumeTrue(Charset.isSupported("UTF_32LE"));
581         final byte[] data = "ABC".getBytes("UTF_32LE");
582         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
583                 ByteOrderMark.UTF_32LE)) {
584             assertEquals('A', in.read());
585             assertEquals(0, in.read());
586             assertEquals(0, in.read());
587             assertEquals(0, in.read());
588             assertEquals('B', in.read());
589             assertEquals(0, in.read());
590             assertEquals(0, in.read());
591             assertEquals(0, in.read());
592             assertEquals('C', in.read());
593             assertEquals(0, in.read());
594             assertEquals(0, in.read());
595             assertEquals(0, in.read());
596             assertEquals(-1, in.read());
597             assertTrue(in.hasBOM(), "hasBOM()");
598             assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
599             assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
600             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
601         }
602     }
603 
604     @Test
605     void testReadWithBOMUtf8() throws Exception {
606         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
607         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
608             assertEquals('A', in.read());
609             assertEquals('B', in.read());
610             assertEquals('C', in.read());
611             assertEquals(-1, in.read());
612             assertTrue(in.hasBOM(), "hasBOM()");
613             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
614             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
615             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
616         }
617     }
618 
619     @Test
620     void testReadWithMultipleBOM() throws Exception {
621         final byte[] data = { 'A', 'B', 'C' };
622         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
623                 ByteOrderMark.UTF_8)) {
624             assertEquals('A', in.read());
625             assertEquals('B', in.read());
626             assertEquals('C', in.read());
627             assertEquals(-1, in.read());
628             assertTrue(in.hasBOM(), "hasBOM()");
629             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
630             assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
631             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
632         }
633     }
634 
635     @Test
636     void testReadWithoutBOM() throws Exception {
637         final byte[] data = { 'A', 'B', 'C' };
638         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
639             assertEquals('A', in.read());
640             assertEquals('B', in.read());
641             assertEquals('C', in.read());
642             assertEquals(-1, in.read());
643             assertFalse(in.hasBOM(), "hasBOM()");
644             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
645             assertNull(in.getBOM(), "getBOM");
646         }
647     }
648 
649     @Test
650     void testReadXmlWithBOMUcs2() throws Exception {
651         assumeFalse(SystemProperties.getJavaVendor().contains("IBM"), "This test does not pass on some IBM VMs XML parsers");
652 
653         // UCS-2 is BE.
654         assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
655         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
656         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
657             parseXml(in);
658         }
659         parseXml(createUtf16BeDataStream(data, true));
660     }
661 
662     @Test
663     void testReadXmlWithBOMUcs4() throws Exception {
664         // UCS-4 is BE or LE?
665         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
666         assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
667         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
668         // XML parser does not know what to do with UTF-32
669         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
670             parseXml(in);
671             // XML parser does not know what to do with UTF-32
672             assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
673         }
674         parseXml(createUtf32BeDataStream(data, true));
675     }
676 
677     @Test
678     void testReadXmlWithBOMUtf16Be() throws Exception {
679         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
680         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
681             parseXml(in);
682         }
683         parseXml(createUtf16BeDataStream(data, true));
684     }
685 
686     @Test
687     void testReadXmlWithBOMUtf16Le() throws Exception {
688         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
689         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
690             parseXml(in);
691         }
692         parseXml(createUtf16LeDataStream(data, true));
693     }
694 
695     @Test
696     void testReadXmlWithBOMUtf32Be() throws Exception {
697         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
698         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
699         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
700             parseXml(in);
701         }
702         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
703         try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
704             parseXml(in);
705         }
706     }
707 
708     @Test
709     void testReadXmlWithBOMUtf32Le() throws Exception {
710         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
711         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
712         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
713             parseXml(in);
714         }
715         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
716         try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
717             parseXml(in);
718         }
719     }
720 
721     @Test
722     void testReadXmlWithBOMUtf8() throws Exception {
723         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
724         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
725             parseXml(in);
726         }
727         parseXml(createUtf8Input(data, true));
728     }
729 
730     @Test
731     void testReadXmlWithoutBOMUtf32Be() throws Exception {
732         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
733         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
734         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
735             parseXml(in);
736         }
737         parseXml(createUtf32BeDataStream(data, false));
738     }
739 
740     @Test
741     void testReadXmlWithoutBOMUtf32Le() throws Exception {
742         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
743         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
744         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
745             parseXml(in);
746         }
747         parseXml(createUtf32BeDataStream(data, false));
748     }
749 
750     @Test
751     void testSkipReturnValueWithBom() throws IOException {
752         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
753         try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
754             assertEquals(2, is1.skip(2));
755             assertEquals((byte) 0x33, is1.read());
756         }
757     }
758 
759     @Test
760     void testSkipReturnValueWithoutBom() throws IOException {
761         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
762         try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
763             assertEquals(2, is2.skip(2)); // IO-428
764             assertEquals((byte) 0x33, is2.read());
765         }
766     }
767 
768     @Test
769     void testSkipWithBOM() throws Exception {
770         final byte[] data = { 'A', 'B', 'C', 'D' };
771         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
772             in.skip(2L);
773             assertEquals('C', in.read());
774         }
775     }
776 
777     @Test
778     void testSkipWithoutBOM() throws Exception {
779         final byte[] data = { 'A', 'B', 'C', 'D' };
780         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
781             in.skip(2L);
782             assertEquals('C', in.read());
783         }
784     }
785 
786     @Test
787     void testSmallBufferWithBOM() throws Exception {
788         final byte[] data = { 'A', 'B', 'C' };
789         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
790             final byte[] buf = new byte[1024];
791             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
792             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
793         }
794     }
795 
796     @Test
797     void testSmallBufferWithoutBOM() throws Exception {
798         final byte[] data = { 'A', 'B', 'C' };
799         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
800             final byte[] buf = new byte[1024];
801             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
802             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
803         }
804     }
805 
806     @Test
807     // make sure that our support code works as expected
808     void testSupportCode() throws Exception {
809         try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
810             final byte[] buf = new byte[1024];
811             final int len = in.read(buf);
812             assertEquals(5, len);
813             assertEquals(0xEF, buf[0] & 0xFF);
814             assertEquals(0xBB, buf[1] & 0xFF);
815             assertEquals(0xBF, buf[2] & 0xFF);
816             assertEquals('A', buf[3] & 0xFF);
817             assertEquals('B', buf[4] & 0xFF);
818 
819             assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
820         }
821     }
822 }