View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  import static org.junit.jupiter.api.Assumptions.assumeFalse;
27  import static org.junit.jupiter.api.Assumptions.assumeTrue;
28  
29  import java.io.ByteArrayInputStream;
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.Reader;
33  import java.nio.charset.Charset;
34  import java.nio.charset.StandardCharsets;
35  import java.util.concurrent.atomic.AtomicBoolean;
36  
37  import javax.xml.parsers.DocumentBuilder;
38  import javax.xml.parsers.DocumentBuilderFactory;
39  import javax.xml.parsers.ParserConfigurationException;
40  
41  import org.apache.commons.io.ByteOrderMark;
42  import org.apache.commons.io.IOUtils;
43  import org.apache.commons.io.test.CustomIOException;
44  import org.apache.commons.lang3.SystemProperties;
45  import org.junit.jupiter.api.Test;
46  import org.w3c.dom.Document;
47  import org.xml.sax.InputSource;
48  import org.xml.sax.SAXException;
49  import org.xml.sax.SAXParseException;
50  
51  /**
52   * Test case for {@link BOMInputStream}.
53   */
54  @SuppressWarnings("ResultOfMethodCallIgnored")
55  public class BOMInputStreamTest {
56  
57      /**
58       *  A mock InputStream that expects {@code close()} to be called.
59       */
60      private static final class ExpectCloseInputStream extends InputStream {
61          private boolean closed;
62  
63          public void assertCloseCalled() {
64              assertTrue(closed);
65          }
66  
67          @Override
68          public void close() throws IOException {
69              closed = true;
70          }
71  
72          @Override
73          public int read() throws IOException {
74              return -1;
75          }
76      }
77  
78      private void assertData(final byte[] expected, final byte[] actual, final int len) {
79          assertEquals(expected.length, len, "length");
80          for (int ii = 0; ii < expected.length; ii++) {
81              assertEquals(expected[ii], actual[ii], "byte " + ii);
82          }
83      }
84  
85      /**
86       *  Creates the underlying data stream, with or without BOM.
87       */
88      private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
89          byte[] data = baseData;
90          if (addBOM) {
91              data = new byte[baseData.length + 2];
92              data[0] = (byte) 0xFE;
93              data[1] = (byte) 0xFF;
94              System.arraycopy(baseData, 0, data, 2, baseData.length);
95          }
96          return new ByteArrayInputStream(data);
97      }
98  
99      /**
100      *  Creates the underlying data stream, with or without BOM.
101      */
102     private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
103         byte[] data = baseData;
104         if (addBOM) {
105             data = new byte[baseData.length + 2];
106             data[0] = (byte) 0xFF;
107             data[1] = (byte) 0xFE;
108             System.arraycopy(baseData, 0, data, 2, baseData.length);
109         }
110         return new ByteArrayInputStream(data);
111     }
112 
113     /**
114      *  Creates the underlying data stream, with or without BOM.
115      */
116     private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
117         byte[] data = baseData;
118         if (addBOM) {
119             data = new byte[baseData.length + 4];
120             data[0] = 0;
121             data[1] = 0;
122             data[2] = (byte) 0xFE;
123             data[3] = (byte) 0xFF;
124             System.arraycopy(baseData, 0, data, 4, baseData.length);
125         }
126         return new ByteArrayInputStream(data);
127     }
128 
129     /**
130      *  Creates the underlying data stream, with or without BOM.
131      */
132     private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
133         byte[] data = baseData;
134         if (addBOM) {
135             data = new byte[baseData.length + 4];
136             data[0] = (byte) 0xFF;
137             data[1] = (byte) 0xFE;
138             data[2] = 0;
139             data[3] = 0;
140             System.arraycopy(baseData, 0, data, 4, baseData.length);
141         }
142         return new ByteArrayInputStream(data);
143     }
144 
145     /**
146      *  Creates the underlying data stream, with or without BOM.
147      */
148     private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
149         byte[] data = baseData;
150         if (addBOM) {
151             data = new byte[baseData.length + 3];
152             data[0] = (byte) 0xEF;
153             data[1] = (byte) 0xBB;
154             data[2] = (byte) 0xBF;
155             System.arraycopy(baseData, 0, data, 3, baseData.length);
156         }
157         return new ByteArrayInputStream(data);
158     }
159 
160     private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
161         final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
162         try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
163                 .setCharset(charsetName).get()) {
164             final InputSource is = new InputSource(byteStream);
165             is.setEncoding(charsetName);
166             documentBuilder.parse(is);
167         } catch (final SAXParseException e) {
168             if (e.getMessage().contains(charsetName)) {
169                 return false;
170             }
171         }
172         return true;
173     }
174 
175     private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
176         return Charset.isSupported(charSetName) &&  doesSaxSupportCharacterSet(charSetName);
177     }
178 
179     private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
180         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
181         assertNotNull(doc);
182         assertEquals("X", doc.getFirstChild().getNodeName());
183     }
184 
185     private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
186         final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
187         assertNotNull(doc);
188         assertEquals("X", doc.getFirstChild().getNodeName());
189     }
190 
191     private void readBOMInputStreamTwice(final String resource) throws Exception {
192         try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
193             assertNotNull(inputStream);
194             try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
195                 bomInputStream.mark(1_000_000);
196 
197                 readFile(bomInputStream);
198                 bomInputStream.reset();
199                 readFile(bomInputStream);
200                 inputStream.close();
201             }
202         }
203     }
204 
205     private void readFile(final BOMInputStream bomInputStream) throws Exception {
206         int bytes;
207         final byte[] bytesFromStream = new byte[100];
208         do {
209             bytes = bomInputStream.read(bytesFromStream);
210         } while (bytes > 0);
211     }
212 
213     @Test
214     public void testAfterReadConsumer() throws Exception {
215         final byte[] data = { 'A', 'B', 'C', 'D' };
216         final AtomicBoolean boolRef = new AtomicBoolean();
217         // @formatter:off
218         try (InputStream bounded = BOMInputStream.builder()
219                 .setInputStream(createUtf8Input(data, true))
220                 .setAfterRead(i -> boolRef.set(true))
221                 .get()) {
222             IOUtils.consume(bounded);
223         }
224         // @formatter:on
225         assertTrue(boolRef.get());
226         // Throwing
227         final String message = "test exception message";
228         // @formatter:off
229         try (InputStream bounded = BOMInputStream.builder()
230                 .setInputStream(createUtf8Input(data, true))
231                 .setAfterRead(i -> {
232                     throw new CustomIOException(message);
233                 })
234                 .get()) {
235             assertEquals(message, assertThrowsExactly(CustomIOException.class, () -> IOUtils.consume(bounded)).getMessage());
236         }
237         // @formatter:on
238     }
239 
240     @Test
241     public void testAvailableWithBOMAfterClose() throws Exception {
242         final byte[] data = { 'A', 'B', 'C', 'D' };
243         final InputStream shadow;
244         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
245             assertEquals(7, in.available());
246             shadow = in;
247         }
248         assertEquals(0, shadow.available());
249     }
250 
251     @Test
252     public void testAvailableWithBOMAfterOpen() throws Exception {
253         final byte[] data = { 'A', 'B', 'C', 'D' };
254         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
255             assertEquals(7, in.available());
256         }
257     }
258 
259     @Test
260     public void testAvailableWithoutBOM() throws Exception {
261         final byte[] data = { 'A', 'B', 'C', 'D' };
262         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
263             assertEquals(4, in.available());
264         }
265     }
266 
267     @Test
268     public void testBuilderGet() {
269         // java.lang.IllegalStateException: origin == null
270         assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
271     }
272 
273     @Test
274     // this is here for coverage
275     public void testClose() throws Exception {
276         try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
277             try (InputStream in = new BOMInputStream(del)) {
278                 // nothing
279             }
280             del.assertCloseCalled();
281         }
282     }
283 
284     @Test
285     public void testCloseHandleIOException() throws IOException {
286         ProxyInputStreamTest.testCloseHandleIOException(BOMInputStream.builder());
287     }
288 
289     @Test
290     public void testEmptyBufferWithBOM() throws Exception {
291         final byte[] data = {};
292         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
293             final byte[] buf = new byte[1024];
294             assertEquals(-1, in.read(buf));
295         }
296     }
297 
298     @Test
299     public void testEmptyBufferWithoutBOM() throws Exception {
300         final byte[] data = {};
301         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
302             final byte[] buf = new byte[1024];
303             assertEquals(-1, in.read(buf));
304         }
305     }
306 
307     @Test
308     public void testGetBOMFirstThenRead() throws Exception {
309         final byte[] data = { 'A', 'B', 'C' };
310         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
311             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
312             assertTrue(in.hasBOM(), "hasBOM()");
313             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
314             assertEquals('A', in.read());
315             assertEquals('B', in.read());
316             assertEquals('C', in.read());
317             assertEquals(-1, in.read());
318         }
319     }
320 
321     @Test
322     public void testGetBOMFirstThenReadInclude() throws Exception {
323         final byte[] data = { 'A', 'B', 'C' };
324         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
325             assertTrue(in.hasBOM(), "hasBOM()");
326             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
327             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
328             assertEquals(0xEF, in.read());
329             assertEquals(0xBB, in.read());
330             assertEquals(0xBF, in.read());
331             assertEquals('A', in.read());
332             assertEquals('B', in.read());
333             assertEquals('C', in.read());
334             assertEquals(-1, in.read());
335         }
336     }
337 
338     @Test
339     public void testLargeBufferWithBOM() throws Exception {
340         final byte[] data = { 'A', 'B', 'C' };
341         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
342             final byte[] buf = new byte[1024];
343             assertData(data, buf, in.read(buf));
344         }
345     }
346 
347     @Test
348     public void testLargeBufferWithoutBOM() throws Exception {
349         final byte[] data = { 'A', 'B', 'C' };
350         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
351             final byte[] buf = new byte[1024];
352             assertData(data, buf, in.read(buf));
353         }
354     }
355 
356     @Test
357     public void testLeadingNonBOMBufferedRead() throws Exception {
358         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
359         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
360             final byte[] buf = new byte[1024];
361             assertData(data, buf, in.read(buf));
362         }
363     }
364 
365     @Test
366     public void testLeadingNonBOMSingleRead() throws Exception {
367         final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
368         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
369             assertEquals(0xEF, in.read());
370             assertEquals(0xAB, in.read());
371             assertEquals(0xCD, in.read());
372             assertEquals(-1, in.read());
373         }
374     }
375 
376     @Test
377     public void testMarkResetAfterReadWithBOM() throws Exception {
378         final byte[] data = { 'A', 'B', 'C', 'D' };
379         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
380             assertTrue(in.markSupported());
381 
382             in.read();
383             in.mark(10);
384 
385             in.read();
386             in.read();
387             in.reset();
388             assertEquals('B', in.read());
389         }
390     }
391 
392     @Test
393     public void testMarkResetAfterReadWithoutBOM() throws Exception {
394         final byte[] data = { 'A', 'B', 'C', 'D' };
395         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
396             assertTrue(in.markSupported());
397 
398             in.read();
399             in.mark(10);
400 
401             in.read();
402             in.read();
403             in.reset();
404             assertEquals('B', in.read());
405         }
406     }
407 
408     @Test
409     public void testMarkResetBeforeReadWithBOM() throws Exception {
410         final byte[] data = { 'A', 'B', 'C', 'D' };
411         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
412             assertTrue(in.markSupported());
413 
414             in.mark(10);
415 
416             in.read();
417             in.read();
418             in.reset();
419             assertEquals('A', in.read());
420         }
421     }
422 
423     @Test
424     public void testMarkResetBeforeReadWithoutBOM() throws Exception {
425         final byte[] data = { 'A', 'B', 'C', 'D' };
426         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
427             assertTrue(in.markSupported());
428 
429             in.mark(10);
430 
431             in.read();
432             in.read();
433             in.reset();
434             assertEquals('A', in.read());
435         }
436     }
437 
438     @Test
439     public void testNoBoms() throws Exception {
440         final byte[] data = { 'A', 'B', 'C' };
441         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
442         assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
443         //
444         try (BOMInputStream bomInputStream = BOMInputStream.builder()
445                 .setInputStream(createUtf8Input(data, true))
446                 .setInclude(true)
447                 .setByteOrderMarks((ByteOrderMark[]) null)
448                 .get()) {
449             assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
450         }
451         assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
452                 .setInputStream(createUtf8Input(data, true))
453                 .setInclude(true)
454                 .setByteOrderMarks()
455                 .get()
456                 .close());
457     }
458 
459     @Test
460     public void testReadAfterClose() throws Exception {
461         final byte[] data = { 'A', 'B', 'C', 'D' };
462         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
463             assertEquals(7, in.available());
464             in.close();
465             assertThrows(IOException.class, in::read);
466         }
467     }
468 
469     @Test
470     public void testReadEmpty() throws Exception {
471         final byte[] data = {};
472         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
473             assertEquals(-1, in.read());
474             assertFalse(in.hasBOM(), "hasBOM()");
475             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
476             assertNull(in.getBOM(), "getBOM");
477         }
478     }
479 
480     @Test
481     public void testReadSmall() throws Exception {
482         final byte[] data = { 'A', 'B' };
483         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
484             assertEquals('A', in.read());
485             assertEquals('B', in.read());
486             assertEquals(-1, in.read());
487             assertFalse(in.hasBOM(), "hasBOM()");
488             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
489             assertNull(in.getBOM(), "getBOM");
490         }
491     }
492 
493     @Test
494     public void testReadTwiceWithBOM() throws Exception {
495         readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
496     }
497 
498     @Test
499     public void testReadTwiceWithoutBOM() throws Exception {
500         readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
501     }
502 
503     @Test
504     public void testReadWithBOMInclude() throws Exception {
505         final byte[] data = { 'A', 'B', 'C' };
506         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
507             assertEquals(0xEF, in.read());
508             assertEquals(0xBB, in.read());
509             assertEquals(0xBF, in.read());
510             assertEquals('A', in.read());
511             assertEquals('B', in.read());
512             assertEquals('C', in.read());
513             assertEquals(-1, in.read());
514             assertTrue(in.hasBOM(), "hasBOM()");
515             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
516             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
517         }
518     }
519 
520     @Test
521     public void testReadWithBOMUtf16Be() throws Exception {
522         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
523         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
524             assertEquals(0, in.read());
525             assertEquals('A', in.read());
526             assertEquals(0, in.read());
527             assertEquals('B', in.read());
528             assertEquals(0, in.read());
529             assertEquals('C', in.read());
530             assertEquals(-1, in.read());
531             assertTrue(in.hasBOM(), "hasBOM()");
532             assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
533             assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
534             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
535         }
536     }
537 
538     @Test
539     public void testReadWithBOMUtf16Le() throws Exception {
540         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
541         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
542             assertEquals('A', in.read());
543             assertEquals(0, in.read());
544             assertEquals('B', in.read());
545             assertEquals(0, in.read());
546             assertEquals('C', in.read());
547             assertEquals(0, in.read());
548             assertEquals(-1, in.read());
549             assertTrue(in.hasBOM(), "hasBOM()");
550             assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
551             assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
552             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
553         }
554     }
555 
556     @Test
557     public void testReadWithBOMUtf32Be() throws Exception {
558         assumeTrue(Charset.isSupported("UTF_32BE"));
559         final byte[] data = "ABC".getBytes("UTF_32BE");
560         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
561                 ByteOrderMark.UTF_32BE)) {
562             assertEquals(0, in.read());
563             assertEquals(0, in.read());
564             assertEquals(0, in.read());
565             assertEquals('A', in.read());
566             assertEquals(0, in.read());
567             assertEquals(0, in.read());
568             assertEquals(0, in.read());
569             assertEquals('B', in.read());
570             assertEquals(0, in.read());
571             assertEquals(0, in.read());
572             assertEquals(0, in.read());
573             assertEquals('C', in.read());
574             assertEquals(-1, in.read());
575             assertTrue(in.hasBOM(), "hasBOM()");
576             assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
577             assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
578             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
579         }
580     }
581 
582     @Test
583     public void testReadWithBOMUtf32Le() throws Exception {
584         assumeTrue(Charset.isSupported("UTF_32LE"));
585         final byte[] data = "ABC".getBytes("UTF_32LE");
586         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
587                 ByteOrderMark.UTF_32LE)) {
588             assertEquals('A', in.read());
589             assertEquals(0, in.read());
590             assertEquals(0, in.read());
591             assertEquals(0, in.read());
592             assertEquals('B', in.read());
593             assertEquals(0, in.read());
594             assertEquals(0, in.read());
595             assertEquals(0, in.read());
596             assertEquals('C', in.read());
597             assertEquals(0, in.read());
598             assertEquals(0, in.read());
599             assertEquals(0, in.read());
600             assertEquals(-1, in.read());
601             assertTrue(in.hasBOM(), "hasBOM()");
602             assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
603             assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
604             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
605         }
606     }
607 
608     @Test
609     public void testReadWithBOMUtf8() throws Exception {
610         final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
611         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
612             assertEquals('A', in.read());
613             assertEquals('B', in.read());
614             assertEquals('C', in.read());
615             assertEquals(-1, in.read());
616             assertTrue(in.hasBOM(), "hasBOM()");
617             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
618             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
619             assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
620         }
621     }
622 
623     @Test
624     public void testReadWithMultipleBOM() throws Exception {
625         final byte[] data = { 'A', 'B', 'C' };
626         try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
627                 ByteOrderMark.UTF_8)) {
628             assertEquals('A', in.read());
629             assertEquals('B', in.read());
630             assertEquals('C', in.read());
631             assertEquals(-1, in.read());
632             assertTrue(in.hasBOM(), "hasBOM()");
633             assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
634             assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
635             assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
636         }
637     }
638 
639     @Test
640     public void testReadWithoutBOM() throws Exception {
641         final byte[] data = { 'A', 'B', 'C' };
642         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
643             assertEquals('A', in.read());
644             assertEquals('B', in.read());
645             assertEquals('C', in.read());
646             assertEquals(-1, in.read());
647             assertFalse(in.hasBOM(), "hasBOM()");
648             assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
649             assertNull(in.getBOM(), "getBOM");
650         }
651     }
652 
653     @Test
654     public void testReadXmlWithBOMUcs2() throws Exception {
655         assumeFalse(SystemProperties.getJavaVendor().contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
656 
657         // UCS-2 is BE.
658         assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
659         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
660         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
661             parseXml(in);
662         }
663         parseXml(createUtf16BeDataStream(data, true));
664     }
665 
666     @Test
667     public void testReadXmlWithBOMUcs4() throws Exception {
668         // UCS-4 is BE or LE?
669         // Hm: ISO-10646-UCS-4 is not supported on Oracle 1.6.0_31
670         assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
671         final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
672         // XML parser does not know what to do with UTF-32
673         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
674             parseXml(in);
675             // XML parser does not know what to do with UTF-32
676             assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
677         }
678         parseXml(createUtf32BeDataStream(data, true));
679     }
680 
681     @Test
682     public void testReadXmlWithBOMUtf16Be() throws Exception {
683         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
684         try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
685             parseXml(in);
686         }
687         parseXml(createUtf16BeDataStream(data, true));
688     }
689 
690     @Test
691     public void testReadXmlWithBOMUtf16Le() throws Exception {
692         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
693         try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
694             parseXml(in);
695         }
696         parseXml(createUtf16LeDataStream(data, true));
697     }
698 
699     @Test
700     public void testReadXmlWithBOMUtf32Be() throws Exception {
701         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
702         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
703         try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
704             parseXml(in);
705         }
706         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
707         try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
708             parseXml(in);
709         }
710     }
711 
712     @Test
713     public void testReadXmlWithBOMUtf32Le() throws Exception {
714         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
715         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
716         try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
717             parseXml(in);
718         }
719         // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader
720         try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
721             parseXml(in);
722         }
723     }
724 
725     @Test
726     public void testReadXmlWithBOMUtf8() throws Exception {
727         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
728         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
729             parseXml(in);
730         }
731         parseXml(createUtf8Input(data, true));
732     }
733 
734     @Test
735     public void testReadXmlWithoutBOMUtf32Be() throws Exception {
736         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
737         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
738         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
739             parseXml(in);
740         }
741         parseXml(createUtf32BeDataStream(data, false));
742     }
743 
744     @Test
745     public void testReadXmlWithoutBOMUtf32Le() throws Exception {
746         assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
747         final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
748         try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
749             parseXml(in);
750         }
751         parseXml(createUtf32BeDataStream(data, false));
752     }
753 
754     @Test
755     public void testSkipReturnValueWithBom() throws IOException {
756         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
757         try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
758             assertEquals(2, is1.skip(2));
759             assertEquals((byte) 0x33, is1.read());
760         }
761     }
762 
763     @Test
764     public void testSkipReturnValueWithoutBom() throws IOException {
765         final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
766         try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
767             assertEquals(2, is2.skip(2)); // IO-428
768             assertEquals((byte) 0x33, is2.read());
769         }
770     }
771 
772     @Test
773     public void testSkipWithBOM() throws Exception {
774         final byte[] data = { 'A', 'B', 'C', 'D' };
775         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
776             in.skip(2L);
777             assertEquals('C', in.read());
778         }
779     }
780 
781     @Test
782     public void testSkipWithoutBOM() throws Exception {
783         final byte[] data = { 'A', 'B', 'C', 'D' };
784         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
785             in.skip(2L);
786             assertEquals('C', in.read());
787         }
788     }
789 
790     @Test
791     public void testSmallBufferWithBOM() throws Exception {
792         final byte[] data = { 'A', 'B', 'C' };
793         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
794             final byte[] buf = new byte[1024];
795             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
796             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
797         }
798     }
799 
800     @Test
801     public void testSmallBufferWithoutBOM() throws Exception {
802         final byte[] data = { 'A', 'B', 'C' };
803         try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
804             final byte[] buf = new byte[1024];
805             assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
806             assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
807         }
808     }
809 
810     @Test
811     // make sure that our support code works as expected
812     public void testSupportCode() throws Exception {
813         try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
814             final byte[] buf = new byte[1024];
815             final int len = in.read(buf);
816             assertEquals(5, len);
817             assertEquals(0xEF, buf[0] & 0xFF);
818             assertEquals(0xBB, buf[1] & 0xFF);
819             assertEquals(0xBF, buf[2] & 0xFF);
820             assertEquals('A', buf[3] & 0xFF);
821             assertEquals('B', buf[4] & 0xFF);
822 
823             assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
824         }
825     }
826 }