1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
25 import static org.junit.jupiter.api.Assertions.assertTrue;
26 import static org.junit.jupiter.api.Assumptions.assumeFalse;
27 import static org.junit.jupiter.api.Assumptions.assumeTrue;
28
29 import java.io.ByteArrayInputStream;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.Reader;
33 import java.nio.charset.Charset;
34 import java.nio.charset.StandardCharsets;
35 import java.util.concurrent.atomic.AtomicBoolean;
36
37 import javax.xml.parsers.DocumentBuilder;
38 import javax.xml.parsers.DocumentBuilderFactory;
39 import javax.xml.parsers.ParserConfigurationException;
40
41 import org.apache.commons.io.ByteOrderMark;
42 import org.apache.commons.io.IOUtils;
43 import org.apache.commons.io.test.CustomIOException;
44 import org.apache.commons.lang3.SystemProperties;
45 import org.junit.jupiter.api.Test;
46 import org.w3c.dom.Document;
47 import org.xml.sax.InputSource;
48 import org.xml.sax.SAXException;
49 import org.xml.sax.SAXParseException;
50
51
52
53
54 @SuppressWarnings("ResultOfMethodCallIgnored")
55 public class BOMInputStreamTest {
56
57
58
59
60 private static final class ExpectCloseInputStream extends InputStream {
61 private boolean closed;
62
63 public void assertCloseCalled() {
64 assertTrue(closed);
65 }
66
67 @Override
68 public void close() throws IOException {
69 closed = true;
70 }
71
72 @Override
73 public int read() throws IOException {
74 return -1;
75 }
76 }
77
78 private void assertData(final byte[] expected, final byte[] actual, final int len) {
79 assertEquals(expected.length, len, "length");
80 for (int ii = 0; ii < expected.length; ii++) {
81 assertEquals(expected[ii], actual[ii], "byte " + ii);
82 }
83 }
84
85
86
87
88 private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
89 byte[] data = baseData;
90 if (addBOM) {
91 data = new byte[baseData.length + 2];
92 data[0] = (byte) 0xFE;
93 data[1] = (byte) 0xFF;
94 System.arraycopy(baseData, 0, data, 2, baseData.length);
95 }
96 return new ByteArrayInputStream(data);
97 }
98
99
100
101
102 private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
103 byte[] data = baseData;
104 if (addBOM) {
105 data = new byte[baseData.length + 2];
106 data[0] = (byte) 0xFF;
107 data[1] = (byte) 0xFE;
108 System.arraycopy(baseData, 0, data, 2, baseData.length);
109 }
110 return new ByteArrayInputStream(data);
111 }
112
113
114
115
116 private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
117 byte[] data = baseData;
118 if (addBOM) {
119 data = new byte[baseData.length + 4];
120 data[0] = 0;
121 data[1] = 0;
122 data[2] = (byte) 0xFE;
123 data[3] = (byte) 0xFF;
124 System.arraycopy(baseData, 0, data, 4, baseData.length);
125 }
126 return new ByteArrayInputStream(data);
127 }
128
129
130
131
132 private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
133 byte[] data = baseData;
134 if (addBOM) {
135 data = new byte[baseData.length + 4];
136 data[0] = (byte) 0xFF;
137 data[1] = (byte) 0xFE;
138 data[2] = 0;
139 data[3] = 0;
140 System.arraycopy(baseData, 0, data, 4, baseData.length);
141 }
142 return new ByteArrayInputStream(data);
143 }
144
145
146
147
148 private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
149 byte[] data = baseData;
150 if (addBOM) {
151 data = new byte[baseData.length + 3];
152 data[0] = (byte) 0xEF;
153 data[1] = (byte) 0xBB;
154 data[2] = (byte) 0xBF;
155 System.arraycopy(baseData, 0, data, 3, baseData.length);
156 }
157 return new ByteArrayInputStream(data);
158 }
159
160 private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
161 final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
162 try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
163 .setCharset(charsetName).get()) {
164 final InputSource is = new InputSource(byteStream);
165 is.setEncoding(charsetName);
166 documentBuilder.parse(is);
167 } catch (final SAXParseException e) {
168 if (e.getMessage().contains(charsetName)) {
169 return false;
170 }
171 }
172 return true;
173 }
174
175 private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
176 return Charset.isSupported(charSetName) && doesSaxSupportCharacterSet(charSetName);
177 }
178
179 private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
180 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
181 assertNotNull(doc);
182 assertEquals("X", doc.getFirstChild().getNodeName());
183 }
184
185 private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
186 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
187 assertNotNull(doc);
188 assertEquals("X", doc.getFirstChild().getNodeName());
189 }
190
191 private void readBOMInputStreamTwice(final String resource) throws Exception {
192 try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
193 assertNotNull(inputStream);
194 try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
195 bomInputStream.mark(1_000_000);
196
197 readFile(bomInputStream);
198 bomInputStream.reset();
199 readFile(bomInputStream);
200 inputStream.close();
201 }
202 }
203 }
204
205 private void readFile(final BOMInputStream bomInputStream) throws Exception {
206 int bytes;
207 final byte[] bytesFromStream = new byte[100];
208 do {
209 bytes = bomInputStream.read(bytesFromStream);
210 } while (bytes > 0);
211 }
212
213 @Test
214 public void testAfterReadConsumer() throws Exception {
215 final byte[] data = { 'A', 'B', 'C', 'D' };
216 final AtomicBoolean boolRef = new AtomicBoolean();
217
218 try (InputStream bounded = BOMInputStream.builder()
219 .setInputStream(createUtf8Input(data, true))
220 .setAfterRead(i -> boolRef.set(true))
221 .get()) {
222 IOUtils.consume(bounded);
223 }
224
225 assertTrue(boolRef.get());
226
227 final String message = "test exception message";
228
229 try (InputStream bounded = BOMInputStream.builder()
230 .setInputStream(createUtf8Input(data, true))
231 .setAfterRead(i -> {
232 throw new CustomIOException(message);
233 })
234 .get()) {
235 assertEquals(message, assertThrowsExactly(CustomIOException.class, () -> IOUtils.consume(bounded)).getMessage());
236 }
237
238 }
239
240 @Test
241 public void testAvailableWithBOMAfterClose() throws Exception {
242 final byte[] data = { 'A', 'B', 'C', 'D' };
243 final InputStream shadow;
244 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
245 assertEquals(7, in.available());
246 shadow = in;
247 }
248 assertEquals(0, shadow.available());
249 }
250
251 @Test
252 public void testAvailableWithBOMAfterOpen() throws Exception {
253 final byte[] data = { 'A', 'B', 'C', 'D' };
254 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
255 assertEquals(7, in.available());
256 }
257 }
258
259 @Test
260 public void testAvailableWithoutBOM() throws Exception {
261 final byte[] data = { 'A', 'B', 'C', 'D' };
262 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
263 assertEquals(4, in.available());
264 }
265 }
266
267 @Test
268 public void testBuilderGet() {
269
270 assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
271 }
272
273 @Test
274
275 public void testClose() throws Exception {
276 try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
277 try (InputStream in = new BOMInputStream(del)) {
278
279 }
280 del.assertCloseCalled();
281 }
282 }
283
284 @Test
285 public void testCloseHandleIOException() throws IOException {
286 ProxyInputStreamTest.testCloseHandleIOException(BOMInputStream.builder());
287 }
288
289 @Test
290 public void testEmptyBufferWithBOM() throws Exception {
291 final byte[] data = {};
292 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
293 final byte[] buf = new byte[1024];
294 assertEquals(-1, in.read(buf));
295 }
296 }
297
298 @Test
299 public void testEmptyBufferWithoutBOM() throws Exception {
300 final byte[] data = {};
301 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
302 final byte[] buf = new byte[1024];
303 assertEquals(-1, in.read(buf));
304 }
305 }
306
307 @Test
308 public void testGetBOMFirstThenRead() throws Exception {
309 final byte[] data = { 'A', 'B', 'C' };
310 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
311 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
312 assertTrue(in.hasBOM(), "hasBOM()");
313 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
314 assertEquals('A', in.read());
315 assertEquals('B', in.read());
316 assertEquals('C', in.read());
317 assertEquals(-1, in.read());
318 }
319 }
320
321 @Test
322 public void testGetBOMFirstThenReadInclude() throws Exception {
323 final byte[] data = { 'A', 'B', 'C' };
324 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
325 assertTrue(in.hasBOM(), "hasBOM()");
326 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
327 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
328 assertEquals(0xEF, in.read());
329 assertEquals(0xBB, in.read());
330 assertEquals(0xBF, in.read());
331 assertEquals('A', in.read());
332 assertEquals('B', in.read());
333 assertEquals('C', in.read());
334 assertEquals(-1, in.read());
335 }
336 }
337
338 @Test
339 public void testLargeBufferWithBOM() throws Exception {
340 final byte[] data = { 'A', 'B', 'C' };
341 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
342 final byte[] buf = new byte[1024];
343 assertData(data, buf, in.read(buf));
344 }
345 }
346
347 @Test
348 public void testLargeBufferWithoutBOM() throws Exception {
349 final byte[] data = { 'A', 'B', 'C' };
350 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
351 final byte[] buf = new byte[1024];
352 assertData(data, buf, in.read(buf));
353 }
354 }
355
356 @Test
357 public void testLeadingNonBOMBufferedRead() throws Exception {
358 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
359 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
360 final byte[] buf = new byte[1024];
361 assertData(data, buf, in.read(buf));
362 }
363 }
364
365 @Test
366 public void testLeadingNonBOMSingleRead() throws Exception {
367 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
368 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
369 assertEquals(0xEF, in.read());
370 assertEquals(0xAB, in.read());
371 assertEquals(0xCD, in.read());
372 assertEquals(-1, in.read());
373 }
374 }
375
376 @Test
377 public void testMarkResetAfterReadWithBOM() throws Exception {
378 final byte[] data = { 'A', 'B', 'C', 'D' };
379 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
380 assertTrue(in.markSupported());
381
382 in.read();
383 in.mark(10);
384
385 in.read();
386 in.read();
387 in.reset();
388 assertEquals('B', in.read());
389 }
390 }
391
392 @Test
393 public void testMarkResetAfterReadWithoutBOM() throws Exception {
394 final byte[] data = { 'A', 'B', 'C', 'D' };
395 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
396 assertTrue(in.markSupported());
397
398 in.read();
399 in.mark(10);
400
401 in.read();
402 in.read();
403 in.reset();
404 assertEquals('B', in.read());
405 }
406 }
407
408 @Test
409 public void testMarkResetBeforeReadWithBOM() throws Exception {
410 final byte[] data = { 'A', 'B', 'C', 'D' };
411 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
412 assertTrue(in.markSupported());
413
414 in.mark(10);
415
416 in.read();
417 in.read();
418 in.reset();
419 assertEquals('A', in.read());
420 }
421 }
422
423 @Test
424 public void testMarkResetBeforeReadWithoutBOM() throws Exception {
425 final byte[] data = { 'A', 'B', 'C', 'D' };
426 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
427 assertTrue(in.markSupported());
428
429 in.mark(10);
430
431 in.read();
432 in.read();
433 in.reset();
434 assertEquals('A', in.read());
435 }
436 }
437
438 @Test
439 public void testNoBoms() throws Exception {
440 final byte[] data = { 'A', 'B', 'C' };
441 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
442 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
443
444 try (BOMInputStream bomInputStream = BOMInputStream.builder()
445 .setInputStream(createUtf8Input(data, true))
446 .setInclude(true)
447 .setByteOrderMarks((ByteOrderMark[]) null)
448 .get()) {
449 assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
450 }
451 assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
452 .setInputStream(createUtf8Input(data, true))
453 .setInclude(true)
454 .setByteOrderMarks()
455 .get()
456 .close());
457 }
458
459 @Test
460 public void testReadAfterClose() throws Exception {
461 final byte[] data = { 'A', 'B', 'C', 'D' };
462 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
463 assertEquals(7, in.available());
464 in.close();
465 assertThrows(IOException.class, in::read);
466 }
467 }
468
469 @Test
470 public void testReadEmpty() throws Exception {
471 final byte[] data = {};
472 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
473 assertEquals(-1, in.read());
474 assertFalse(in.hasBOM(), "hasBOM()");
475 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
476 assertNull(in.getBOM(), "getBOM");
477 }
478 }
479
480 @Test
481 public void testReadSmall() throws Exception {
482 final byte[] data = { 'A', 'B' };
483 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
484 assertEquals('A', in.read());
485 assertEquals('B', in.read());
486 assertEquals(-1, in.read());
487 assertFalse(in.hasBOM(), "hasBOM()");
488 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
489 assertNull(in.getBOM(), "getBOM");
490 }
491 }
492
493 @Test
494 public void testReadTwiceWithBOM() throws Exception {
495 readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
496 }
497
498 @Test
499 public void testReadTwiceWithoutBOM() throws Exception {
500 readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
501 }
502
503 @Test
504 public void testReadWithBOMInclude() throws Exception {
505 final byte[] data = { 'A', 'B', 'C' };
506 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
507 assertEquals(0xEF, in.read());
508 assertEquals(0xBB, in.read());
509 assertEquals(0xBF, in.read());
510 assertEquals('A', in.read());
511 assertEquals('B', in.read());
512 assertEquals('C', in.read());
513 assertEquals(-1, in.read());
514 assertTrue(in.hasBOM(), "hasBOM()");
515 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
516 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
517 }
518 }
519
520 @Test
521 public void testReadWithBOMUtf16Be() throws Exception {
522 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
523 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
524 assertEquals(0, in.read());
525 assertEquals('A', in.read());
526 assertEquals(0, in.read());
527 assertEquals('B', in.read());
528 assertEquals(0, in.read());
529 assertEquals('C', in.read());
530 assertEquals(-1, in.read());
531 assertTrue(in.hasBOM(), "hasBOM()");
532 assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
533 assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
534 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
535 }
536 }
537
538 @Test
539 public void testReadWithBOMUtf16Le() throws Exception {
540 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
541 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
542 assertEquals('A', in.read());
543 assertEquals(0, in.read());
544 assertEquals('B', in.read());
545 assertEquals(0, in.read());
546 assertEquals('C', in.read());
547 assertEquals(0, in.read());
548 assertEquals(-1, in.read());
549 assertTrue(in.hasBOM(), "hasBOM()");
550 assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
551 assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
552 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
553 }
554 }
555
556 @Test
557 public void testReadWithBOMUtf32Be() throws Exception {
558 assumeTrue(Charset.isSupported("UTF_32BE"));
559 final byte[] data = "ABC".getBytes("UTF_32BE");
560 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
561 ByteOrderMark.UTF_32BE)) {
562 assertEquals(0, in.read());
563 assertEquals(0, in.read());
564 assertEquals(0, in.read());
565 assertEquals('A', in.read());
566 assertEquals(0, in.read());
567 assertEquals(0, in.read());
568 assertEquals(0, in.read());
569 assertEquals('B', in.read());
570 assertEquals(0, in.read());
571 assertEquals(0, in.read());
572 assertEquals(0, in.read());
573 assertEquals('C', in.read());
574 assertEquals(-1, in.read());
575 assertTrue(in.hasBOM(), "hasBOM()");
576 assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
577 assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
578 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
579 }
580 }
581
582 @Test
583 public void testReadWithBOMUtf32Le() throws Exception {
584 assumeTrue(Charset.isSupported("UTF_32LE"));
585 final byte[] data = "ABC".getBytes("UTF_32LE");
586 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
587 ByteOrderMark.UTF_32LE)) {
588 assertEquals('A', in.read());
589 assertEquals(0, in.read());
590 assertEquals(0, in.read());
591 assertEquals(0, in.read());
592 assertEquals('B', in.read());
593 assertEquals(0, in.read());
594 assertEquals(0, in.read());
595 assertEquals(0, in.read());
596 assertEquals('C', in.read());
597 assertEquals(0, in.read());
598 assertEquals(0, in.read());
599 assertEquals(0, in.read());
600 assertEquals(-1, in.read());
601 assertTrue(in.hasBOM(), "hasBOM()");
602 assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
603 assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
604 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
605 }
606 }
607
608 @Test
609 public void testReadWithBOMUtf8() throws Exception {
610 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
611 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
612 assertEquals('A', in.read());
613 assertEquals('B', in.read());
614 assertEquals('C', in.read());
615 assertEquals(-1, in.read());
616 assertTrue(in.hasBOM(), "hasBOM()");
617 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
618 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
619 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
620 }
621 }
622
623 @Test
624 public void testReadWithMultipleBOM() throws Exception {
625 final byte[] data = { 'A', 'B', 'C' };
626 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
627 ByteOrderMark.UTF_8)) {
628 assertEquals('A', in.read());
629 assertEquals('B', in.read());
630 assertEquals('C', in.read());
631 assertEquals(-1, in.read());
632 assertTrue(in.hasBOM(), "hasBOM()");
633 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
634 assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
635 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
636 }
637 }
638
639 @Test
640 public void testReadWithoutBOM() throws Exception {
641 final byte[] data = { 'A', 'B', 'C' };
642 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
643 assertEquals('A', in.read());
644 assertEquals('B', in.read());
645 assertEquals('C', in.read());
646 assertEquals(-1, in.read());
647 assertFalse(in.hasBOM(), "hasBOM()");
648 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
649 assertNull(in.getBOM(), "getBOM");
650 }
651 }
652
653 @Test
654 public void testReadXmlWithBOMUcs2() throws Exception {
655 assumeFalse(SystemProperties.getJavaVendor().contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
656
657
658 assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
659 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
660 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
661 parseXml(in);
662 }
663 parseXml(createUtf16BeDataStream(data, true));
664 }
665
666 @Test
667 public void testReadXmlWithBOMUcs4() throws Exception {
668
669
670 assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
671 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
672
673 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
674 parseXml(in);
675
676 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
677 }
678 parseXml(createUtf32BeDataStream(data, true));
679 }
680
681 @Test
682 public void testReadXmlWithBOMUtf16Be() throws Exception {
683 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
684 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
685 parseXml(in);
686 }
687 parseXml(createUtf16BeDataStream(data, true));
688 }
689
690 @Test
691 public void testReadXmlWithBOMUtf16Le() throws Exception {
692 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
693 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
694 parseXml(in);
695 }
696 parseXml(createUtf16LeDataStream(data, true));
697 }
698
699 @Test
700 public void testReadXmlWithBOMUtf32Be() throws Exception {
701 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
702 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
703 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
704 parseXml(in);
705 }
706
707 try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
708 parseXml(in);
709 }
710 }
711
712 @Test
713 public void testReadXmlWithBOMUtf32Le() throws Exception {
714 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
715 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
716 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
717 parseXml(in);
718 }
719
720 try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
721 parseXml(in);
722 }
723 }
724
725 @Test
726 public void testReadXmlWithBOMUtf8() throws Exception {
727 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
728 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
729 parseXml(in);
730 }
731 parseXml(createUtf8Input(data, true));
732 }
733
734 @Test
735 public void testReadXmlWithoutBOMUtf32Be() throws Exception {
736 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
737 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
738 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
739 parseXml(in);
740 }
741 parseXml(createUtf32BeDataStream(data, false));
742 }
743
744 @Test
745 public void testReadXmlWithoutBOMUtf32Le() throws Exception {
746 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
747 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
748 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
749 parseXml(in);
750 }
751 parseXml(createUtf32BeDataStream(data, false));
752 }
753
754 @Test
755 public void testSkipReturnValueWithBom() throws IOException {
756 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
757 try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
758 assertEquals(2, is1.skip(2));
759 assertEquals((byte) 0x33, is1.read());
760 }
761 }
762
763 @Test
764 public void testSkipReturnValueWithoutBom() throws IOException {
765 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
766 try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
767 assertEquals(2, is2.skip(2));
768 assertEquals((byte) 0x33, is2.read());
769 }
770 }
771
772 @Test
773 public void testSkipWithBOM() throws Exception {
774 final byte[] data = { 'A', 'B', 'C', 'D' };
775 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
776 in.skip(2L);
777 assertEquals('C', in.read());
778 }
779 }
780
781 @Test
782 public void testSkipWithoutBOM() throws Exception {
783 final byte[] data = { 'A', 'B', 'C', 'D' };
784 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
785 in.skip(2L);
786 assertEquals('C', in.read());
787 }
788 }
789
790 @Test
791 public void testSmallBufferWithBOM() throws Exception {
792 final byte[] data = { 'A', 'B', 'C' };
793 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
794 final byte[] buf = new byte[1024];
795 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
796 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
797 }
798 }
799
800 @Test
801 public void testSmallBufferWithoutBOM() throws Exception {
802 final byte[] data = { 'A', 'B', 'C' };
803 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
804 final byte[] buf = new byte[1024];
805 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
806 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
807 }
808 }
809
810 @Test
811
812 public void testSupportCode() throws Exception {
813 try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
814 final byte[] buf = new byte[1024];
815 final int len = in.read(buf);
816 assertEquals(5, len);
817 assertEquals(0xEF, buf[0] & 0xFF);
818 assertEquals(0xBB, buf[1] & 0xFF);
819 assertEquals(0xBF, buf[2] & 0xFF);
820 assertEquals('A', buf[3] & 0xFF);
821 assertEquals('B', buf[4] & 0xFF);
822
823 assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
824 }
825 }
826 }