1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
25 import static org.junit.jupiter.api.Assertions.assertTrue;
26 import static org.junit.jupiter.api.Assumptions.assumeFalse;
27 import static org.junit.jupiter.api.Assumptions.assumeTrue;
28
29 import java.io.ByteArrayInputStream;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.Reader;
33 import java.nio.charset.Charset;
34 import java.nio.charset.StandardCharsets;
35 import java.util.concurrent.atomic.AtomicBoolean;
36
37 import javax.xml.parsers.DocumentBuilder;
38 import javax.xml.parsers.DocumentBuilderFactory;
39 import javax.xml.parsers.ParserConfigurationException;
40
41 import org.apache.commons.io.ByteOrderMark;
42 import org.apache.commons.io.IOUtils;
43 import org.apache.commons.io.test.CustomIOException;
44 import org.apache.commons.lang3.SystemProperties;
45 import org.junit.jupiter.api.Test;
46 import org.w3c.dom.Document;
47 import org.xml.sax.InputSource;
48 import org.xml.sax.SAXException;
49 import org.xml.sax.SAXParseException;
50
51
52
53
54 @SuppressWarnings("ResultOfMethodCallIgnored")
55 class BOMInputStreamTest {
56
57
58
59
60 private static final class ExpectCloseInputStream extends InputStream {
61 private boolean closed;
62
63 public void assertCloseCalled() {
64 assertTrue(closed);
65 }
66
67 @Override
68 public void close() throws IOException {
69 closed = true;
70 }
71
72 @Override
73 public int read() throws IOException {
74 return -1;
75 }
76 }
77
78 private void assertData(final byte[] expected, final byte[] actual, final int len) {
79 assertEquals(expected.length, len, "length");
80 for (int ii = 0; ii < expected.length; ii++) {
81 assertEquals(expected[ii], actual[ii], "byte " + ii);
82 }
83 }
84
85
86
87
88 private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
89 byte[] data = baseData;
90 if (addBOM) {
91 data = new byte[baseData.length + 2];
92 data[0] = (byte) 0xFE;
93 data[1] = (byte) 0xFF;
94 System.arraycopy(baseData, 0, data, 2, baseData.length);
95 }
96 return new ByteArrayInputStream(data);
97 }
98
99
100
101
102 private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
103 byte[] data = baseData;
104 if (addBOM) {
105 data = new byte[baseData.length + 2];
106 data[0] = (byte) 0xFF;
107 data[1] = (byte) 0xFE;
108 System.arraycopy(baseData, 0, data, 2, baseData.length);
109 }
110 return new ByteArrayInputStream(data);
111 }
112
113
114
115
116 private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
117 byte[] data = baseData;
118 if (addBOM) {
119 data = new byte[baseData.length + 4];
120 data[0] = 0;
121 data[1] = 0;
122 data[2] = (byte) 0xFE;
123 data[3] = (byte) 0xFF;
124 System.arraycopy(baseData, 0, data, 4, baseData.length);
125 }
126 return new ByteArrayInputStream(data);
127 }
128
129
130
131
132 private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
133 byte[] data = baseData;
134 if (addBOM) {
135 data = new byte[baseData.length + 4];
136 data[0] = (byte) 0xFF;
137 data[1] = (byte) 0xFE;
138 data[2] = 0;
139 data[3] = 0;
140 System.arraycopy(baseData, 0, data, 4, baseData.length);
141 }
142 return new ByteArrayInputStream(data);
143 }
144
145
146
147
148 private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
149 byte[] data = baseData;
150 if (addBOM) {
151 data = new byte[baseData.length + 3];
152 data[0] = (byte) 0xEF;
153 data[1] = (byte) 0xBB;
154 data[2] = (byte) 0xBF;
155 System.arraycopy(baseData, 0, data, 3, baseData.length);
156 }
157 return new ByteArrayInputStream(data);
158 }
159
160 private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
161 final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
162 try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
163 .setCharset(charsetName).get()) {
164 final InputSource is = new InputSource(byteStream);
165 is.setEncoding(charsetName);
166 documentBuilder.parse(is);
167 } catch (final SAXParseException e) {
168 if (e.getMessage().contains(charsetName)) {
169 return false;
170 }
171 }
172 return true;
173 }
174
175 private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
176 return Charset.isSupported(charSetName) && doesSaxSupportCharacterSet(charSetName);
177 }
178
179 private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
180 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
181 assertNotNull(doc);
182 assertEquals("X", doc.getFirstChild().getNodeName());
183 }
184
185 private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
186 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
187 assertNotNull(doc);
188 assertEquals("X", doc.getFirstChild().getNodeName());
189 }
190
191 private void readBOMInputStreamTwice(final String resource) throws Exception {
192 try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
193 assertNotNull(inputStream);
194 try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
195 bomInputStream.mark(1_000_000);
196 readFile(bomInputStream);
197 bomInputStream.reset();
198 readFile(bomInputStream);
199 inputStream.close();
200 }
201 }
202 }
203
204 private void readFile(final BOMInputStream bomInputStream) throws Exception {
205 int bytes;
206 final byte[] bytesFromStream = new byte[100];
207 do {
208 bytes = bomInputStream.read(bytesFromStream);
209 } while (bytes > 0);
210 }
211
212 @Test
213 void testAfterReadConsumer() throws Exception {
214 final byte[] data = { 'A', 'B', 'C', 'D' };
215 final AtomicBoolean boolRef = new AtomicBoolean();
216
217 try (BOMInputStream bounded = BOMInputStream.builder()
218 .setInputStream(createUtf8Input(data, true))
219 .setAfterRead(i -> boolRef.set(true))
220 .get()) {
221 IOUtils.consume(bounded);
222 }
223
224 assertTrue(boolRef.get());
225
226 final String message = "test exception message";
227
228 assertEquals(message, assertThrowsExactly(CustomIOException.class, () -> BOMInputStream.builder()
229 .setInputStream(createUtf8Input(data, true))
230 .setAfterRead(i -> {
231 throw new CustomIOException(message);
232 })
233 .get()).getMessage());
234
235 }
236
237 @Test
238 void testAvailableWithBOMAfterClose() throws Exception {
239 final byte[] data = { 'A', 'B', 'C', 'D' };
240 final InputStream shadow;
241 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
242 assertEquals(4, in.available());
243 shadow = in;
244 }
245 assertEquals(0, shadow.available());
246 }
247
248 @Test
249 void testAvailableWithBOMAfterOpen() throws Exception {
250 final byte[] data = { 'A', 'B', 'C', 'D' };
251 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
252 assertEquals(4, in.available());
253 }
254 }
255
256 @Test
257 void testAvailableWithoutBOM() throws Exception {
258 final byte[] data = { 'A', 'B', 'C', 'D' };
259 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
260 assertEquals(1, in.available());
261 }
262 }
263
264 @Test
265 void testBuilderGet() {
266
267 assertThrows(IllegalStateException.class, () -> BOMInputStream.builder().get());
268 }
269
270 @Test
271
272 void testClose() throws Exception {
273 try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
274 try (BOMInputStream in = new BOMInputStream(del)) {
275
276 }
277 del.assertCloseCalled();
278 }
279 }
280
281 @Test
282 void testCloseHandleIOException() throws IOException {
283 final IOException exception = new IOException();
284 ProxyInputStreamTest.testCloseHandleIOException(BOMInputStream.builder().setInputStream(new BrokenInputStream(() -> exception) {
285
286 @Override
287 public int read() throws IOException {
288 return 'X';
289 }
290 }).get());
291 }
292
293 @Test
294 void testEmptyBufferWithBOM() throws Exception {
295 final byte[] data = {};
296 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
297 final byte[] buf = new byte[1024];
298 assertEquals(-1, in.read(buf));
299 }
300 }
301
302 @Test
303 void testEmptyBufferWithoutBOM() throws Exception {
304 final byte[] data = {};
305 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
306 final byte[] buf = new byte[1024];
307 assertEquals(-1, in.read(buf));
308 }
309 }
310
311 @Test
312 void testGetBOMFirstThenRead() throws Exception {
313 final byte[] data = { 'A', 'B', 'C' };
314 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
315 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
316 assertTrue(in.hasBOM(), "hasBOM()");
317 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
318 assertEquals('A', in.read());
319 assertEquals('B', in.read());
320 assertEquals('C', in.read());
321 assertEquals(-1, in.read());
322 }
323 }
324
325 @Test
326 void testGetBOMFirstThenReadInclude() throws Exception {
327 final byte[] data = { 'A', 'B', 'C' };
328 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
329 assertTrue(in.hasBOM(), "hasBOM()");
330 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
331 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
332 assertEquals(0xEF, in.read());
333 assertEquals(0xBB, in.read());
334 assertEquals(0xBF, in.read());
335 assertEquals('A', in.read());
336 assertEquals('B', in.read());
337 assertEquals('C', in.read());
338 assertEquals(-1, in.read());
339 }
340 }
341
342 @Test
343 void testLargeBufferWithBOM() throws Exception {
344 final byte[] data = { 'A', 'B', 'C' };
345 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
346 final byte[] buf = new byte[1024];
347 assertData(data, buf, in.read(buf));
348 }
349 }
350
351 @Test
352 void testLargeBufferWithoutBOM() throws Exception {
353 final byte[] data = { 'A', 'B', 'C' };
354 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
355 final byte[] buf = new byte[1024];
356 assertData(data, buf, in.read(buf));
357 }
358 }
359
360 @Test
361 void testLeadingNonBOMBufferedRead() throws Exception {
362 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
363 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
364 final byte[] buf = new byte[1024];
365 assertData(data, buf, in.read(buf));
366 }
367 }
368
369 @Test
370 void testLeadingNonBOMSingleRead() throws Exception {
371 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
372 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
373 assertEquals(0xEF, in.read());
374 assertEquals(0xAB, in.read());
375 assertEquals(0xCD, in.read());
376 assertEquals(-1, in.read());
377 }
378 }
379
380 @Test
381 void testMarkResetAfterReadWithBOM() throws Exception {
382 final byte[] data = { 'A', 'B', 'C', 'D' };
383 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
384 assertTrue(in.markSupported());
385 in.read();
386 in.mark(10);
387 in.read();
388 in.read();
389 in.reset();
390 assertEquals('B', in.read());
391 }
392 }
393
394 @Test
395 void testMarkResetAfterReadWithoutBOM() throws Exception {
396 final byte[] data = { 'A', 'B', 'C', 'D' };
397 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
398 assertTrue(in.markSupported());
399 in.read();
400 in.mark(10);
401 in.read();
402 in.read();
403 in.reset();
404 assertEquals('B', in.read());
405 }
406 }
407
408 @Test
409 void testMarkResetBeforeReadWithBOM() throws Exception {
410 final byte[] data = { 'A', 'B', 'C', 'D' };
411 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
412 assertTrue(in.markSupported());
413 in.mark(10);
414 in.read();
415 in.read();
416 in.reset();
417 assertEquals('A', in.read());
418 }
419 }
420
421 @Test
422 void testMarkResetBeforeReadWithoutBOM() throws Exception {
423 final byte[] data = { 'A', 'B', 'C', 'D' };
424 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
425 assertTrue(in.markSupported());
426 in.mark(10);
427 in.read();
428 in.read();
429 in.reset();
430 assertEquals('A', in.read());
431 }
432 }
433
434 @Test
435 void testNoBoms() throws Exception {
436 final byte[] data = { 'A', 'B', 'C' };
437 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
438 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
439
440 try (BOMInputStream bomInputStream = BOMInputStream.builder()
441 .setInputStream(createUtf8Input(data, true))
442 .setInclude(true)
443 .setByteOrderMarks((ByteOrderMark[]) null)
444 .get()) {
445 assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
446 }
447 assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
448 .setInputStream(createUtf8Input(data, true))
449 .setInclude(true)
450 .setByteOrderMarks()
451 .get()
452 .close());
453 }
454
455 @Test
456 void testReadAfterClose() throws Exception {
457 final byte[] data = { 'A', 'B', 'C', 'D' };
458 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
459 assertEquals(4, in.available());
460 in.close();
461 assertThrows(IOException.class, in::read);
462 }
463 }
464
465 @Test
466 void testReadEmpty() throws Exception {
467 final byte[] data = {};
468 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
469 assertEquals(-1, in.read());
470 assertFalse(in.hasBOM(), "hasBOM()");
471 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
472 assertNull(in.getBOM(), "getBOM");
473 }
474 }
475
476 @Test
477 void testReadSmall() throws Exception {
478 final byte[] data = { 'A', 'B' };
479 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
480 assertEquals('A', in.read());
481 assertEquals('B', in.read());
482 assertEquals(-1, in.read());
483 assertFalse(in.hasBOM(), "hasBOM()");
484 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
485 assertNull(in.getBOM(), "getBOM");
486 }
487 }
488
489 @Test
490 void testReadTwiceWithBOM() throws Exception {
491 readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
492 }
493
494 @Test
495 void testReadTwiceWithoutBOM() throws Exception {
496 readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
497 }
498
499 @Test
500 void testReadWithBOMInclude() throws Exception {
501 final byte[] data = { 'A', 'B', 'C' };
502 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
503 assertEquals(0xEF, in.read());
504 assertEquals(0xBB, in.read());
505 assertEquals(0xBF, in.read());
506 assertEquals('A', in.read());
507 assertEquals('B', in.read());
508 assertEquals('C', in.read());
509 assertEquals(-1, in.read());
510 assertTrue(in.hasBOM(), "hasBOM()");
511 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
512 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
513 }
514 }
515
516 @Test
517 void testReadWithBOMUtf16Be() throws Exception {
518 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
519 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
520 assertEquals(0, in.read());
521 assertEquals('A', in.read());
522 assertEquals(0, in.read());
523 assertEquals('B', in.read());
524 assertEquals(0, in.read());
525 assertEquals('C', in.read());
526 assertEquals(-1, in.read());
527 assertTrue(in.hasBOM(), "hasBOM()");
528 assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
529 assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
530 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
531 }
532 }
533
534 @Test
535 void testReadWithBOMUtf16Le() throws Exception {
536 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
537 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
538 assertEquals('A', in.read());
539 assertEquals(0, in.read());
540 assertEquals('B', in.read());
541 assertEquals(0, in.read());
542 assertEquals('C', in.read());
543 assertEquals(0, in.read());
544 assertEquals(-1, in.read());
545 assertTrue(in.hasBOM(), "hasBOM()");
546 assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
547 assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
548 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
549 }
550 }
551
552 @Test
553 void testReadWithBOMUtf32Be() throws Exception {
554 assumeTrue(Charset.isSupported("UTF_32BE"));
555 final byte[] data = "ABC".getBytes("UTF_32BE");
556 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
557 ByteOrderMark.UTF_32BE)) {
558 assertEquals(0, in.read());
559 assertEquals(0, in.read());
560 assertEquals(0, in.read());
561 assertEquals('A', in.read());
562 assertEquals(0, in.read());
563 assertEquals(0, in.read());
564 assertEquals(0, in.read());
565 assertEquals('B', in.read());
566 assertEquals(0, in.read());
567 assertEquals(0, in.read());
568 assertEquals(0, in.read());
569 assertEquals('C', in.read());
570 assertEquals(-1, in.read());
571 assertTrue(in.hasBOM(), "hasBOM()");
572 assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
573 assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
574 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
575 }
576 }
577
578 @Test
579 void testReadWithBOMUtf32Le() throws Exception {
580 assumeTrue(Charset.isSupported("UTF_32LE"));
581 final byte[] data = "ABC".getBytes("UTF_32LE");
582 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
583 ByteOrderMark.UTF_32LE)) {
584 assertEquals('A', in.read());
585 assertEquals(0, in.read());
586 assertEquals(0, in.read());
587 assertEquals(0, in.read());
588 assertEquals('B', in.read());
589 assertEquals(0, in.read());
590 assertEquals(0, in.read());
591 assertEquals(0, in.read());
592 assertEquals('C', in.read());
593 assertEquals(0, in.read());
594 assertEquals(0, in.read());
595 assertEquals(0, in.read());
596 assertEquals(-1, in.read());
597 assertTrue(in.hasBOM(), "hasBOM()");
598 assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
599 assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
600 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
601 }
602 }
603
604 @Test
605 void testReadWithBOMUtf8() throws Exception {
606 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
607 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
608 assertEquals('A', in.read());
609 assertEquals('B', in.read());
610 assertEquals('C', in.read());
611 assertEquals(-1, in.read());
612 assertTrue(in.hasBOM(), "hasBOM()");
613 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
614 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
615 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
616 }
617 }
618
619 @Test
620 void testReadWithMultipleBOM() throws Exception {
621 final byte[] data = { 'A', 'B', 'C' };
622 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
623 ByteOrderMark.UTF_8)) {
624 assertEquals('A', in.read());
625 assertEquals('B', in.read());
626 assertEquals('C', in.read());
627 assertEquals(-1, in.read());
628 assertTrue(in.hasBOM(), "hasBOM()");
629 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
630 assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
631 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
632 }
633 }
634
635 @Test
636 void testReadWithoutBOM() throws Exception {
637 final byte[] data = { 'A', 'B', 'C' };
638 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
639 assertEquals('A', in.read());
640 assertEquals('B', in.read());
641 assertEquals('C', in.read());
642 assertEquals(-1, in.read());
643 assertFalse(in.hasBOM(), "hasBOM()");
644 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
645 assertNull(in.getBOM(), "getBOM");
646 }
647 }
648
649 @Test
650 void testReadXmlWithBOMUcs2() throws Exception {
651 assumeFalse(SystemProperties.getJavaVendor().contains("IBM"), "This test does not pass on some IBM VMs XML parsers");
652
653
654 assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
655 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
656 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
657 parseXml(in);
658 }
659 parseXml(createUtf16BeDataStream(data, true));
660 }
661
662 @Test
663 void testReadXmlWithBOMUcs4() throws Exception {
664
665
666 assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
667 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
668
669 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
670 parseXml(in);
671
672 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
673 }
674 parseXml(createUtf32BeDataStream(data, true));
675 }
676
677 @Test
678 void testReadXmlWithBOMUtf16Be() throws Exception {
679 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
680 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
681 parseXml(in);
682 }
683 parseXml(createUtf16BeDataStream(data, true));
684 }
685
686 @Test
687 void testReadXmlWithBOMUtf16Le() throws Exception {
688 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
689 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
690 parseXml(in);
691 }
692 parseXml(createUtf16LeDataStream(data, true));
693 }
694
695 @Test
696 void testReadXmlWithBOMUtf32Be() throws Exception {
697 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
698 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
699 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
700 parseXml(in);
701 }
702
703 try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
704 parseXml(in);
705 }
706 }
707
708 @Test
709 void testReadXmlWithBOMUtf32Le() throws Exception {
710 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
711 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
712 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
713 parseXml(in);
714 }
715
716 try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
717 parseXml(in);
718 }
719 }
720
721 @Test
722 void testReadXmlWithBOMUtf8() throws Exception {
723 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
724 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
725 parseXml(in);
726 }
727 parseXml(createUtf8Input(data, true));
728 }
729
730 @Test
731 void testReadXmlWithoutBOMUtf32Be() throws Exception {
732 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
733 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
734 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
735 parseXml(in);
736 }
737 parseXml(createUtf32BeDataStream(data, false));
738 }
739
740 @Test
741 void testReadXmlWithoutBOMUtf32Le() throws Exception {
742 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
743 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
744 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
745 parseXml(in);
746 }
747 parseXml(createUtf32BeDataStream(data, false));
748 }
749
750 @Test
751 void testSkipReturnValueWithBom() throws IOException {
752 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
753 try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
754 assertEquals(2, is1.skip(2));
755 assertEquals((byte) 0x33, is1.read());
756 }
757 }
758
759 @Test
760 void testSkipReturnValueWithoutBom() throws IOException {
761 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
762 try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
763 assertEquals(2, is2.skip(2));
764 assertEquals((byte) 0x33, is2.read());
765 }
766 }
767
768 @Test
769 void testSkipWithBOM() throws Exception {
770 final byte[] data = { 'A', 'B', 'C', 'D' };
771 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
772 in.skip(2L);
773 assertEquals('C', in.read());
774 }
775 }
776
777 @Test
778 void testSkipWithoutBOM() throws Exception {
779 final byte[] data = { 'A', 'B', 'C', 'D' };
780 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
781 in.skip(2L);
782 assertEquals('C', in.read());
783 }
784 }
785
786 @Test
787 void testSmallBufferWithBOM() throws Exception {
788 final byte[] data = { 'A', 'B', 'C' };
789 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
790 final byte[] buf = new byte[1024];
791 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
792 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
793 }
794 }
795
796 @Test
797 void testSmallBufferWithoutBOM() throws Exception {
798 final byte[] data = { 'A', 'B', 'C' };
799 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
800 final byte[] buf = new byte[1024];
801 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
802 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
803 }
804 }
805
806 @Test
807
808 void testSupportCode() throws Exception {
809 try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
810 final byte[] buf = new byte[1024];
811 final int len = in.read(buf);
812 assertEquals(5, len);
813 assertEquals(0xEF, buf[0] & 0xFF);
814 assertEquals(0xBB, buf[1] & 0xFF);
815 assertEquals(0xBF, buf[2] & 0xFF);
816 assertEquals('A', buf[3] & 0xFF);
817 assertEquals('B', buf[4] & 0xFF);
818
819 assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
820 }
821 }
822 }