1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25 import static org.junit.jupiter.api.Assumptions.assumeFalse;
26 import static org.junit.jupiter.api.Assumptions.assumeTrue;
27
28 import java.io.ByteArrayInputStream;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.Reader;
32 import java.nio.charset.Charset;
33 import java.nio.charset.StandardCharsets;
34
35 import javax.xml.parsers.DocumentBuilder;
36 import javax.xml.parsers.DocumentBuilderFactory;
37 import javax.xml.parsers.ParserConfigurationException;
38
39 import org.apache.commons.io.ByteOrderMark;
40 import org.junit.jupiter.api.Test;
41 import org.w3c.dom.Document;
42 import org.xml.sax.InputSource;
43 import org.xml.sax.SAXException;
44 import org.xml.sax.SAXParseException;
45
46
47
48
49 @SuppressWarnings("ResultOfMethodCallIgnored")
50 public class BOMInputStreamTest {
51
52
53
54
55 private static final class ExpectCloseInputStream extends InputStream {
56 private boolean _closeCalled;
57
58 public void assertCloseCalled() {
59 assertTrue(_closeCalled);
60 }
61
62 @Override
63 public void close() throws IOException {
64 _closeCalled = true;
65 }
66
67 @Override
68 public int read() throws IOException {
69 return -1;
70 }
71 }
72
73 private void assertData(final byte[] expected, final byte[] actual, final int len) {
74 assertEquals(expected.length, len, "length");
75 for (int ii = 0; ii < expected.length; ii++) {
76 assertEquals(expected[ii], actual[ii], "byte " + ii);
77 }
78 }
79
80
81
82
83 private InputStream createUtf16BeDataStream(final byte[] baseData, final boolean addBOM) {
84 byte[] data = baseData;
85 if (addBOM) {
86 data = new byte[baseData.length + 2];
87 data[0] = (byte) 0xFE;
88 data[1] = (byte) 0xFF;
89 System.arraycopy(baseData, 0, data, 2, baseData.length);
90 }
91 return new ByteArrayInputStream(data);
92 }
93
94
95
96
97 private InputStream createUtf16LeDataStream(final byte[] baseData, final boolean addBOM) {
98 byte[] data = baseData;
99 if (addBOM) {
100 data = new byte[baseData.length + 2];
101 data[0] = (byte) 0xFF;
102 data[1] = (byte) 0xFE;
103 System.arraycopy(baseData, 0, data, 2, baseData.length);
104 }
105 return new ByteArrayInputStream(data);
106 }
107
108
109
110
111 private InputStream createUtf32BeDataStream(final byte[] baseData, final boolean addBOM) {
112 byte[] data = baseData;
113 if (addBOM) {
114 data = new byte[baseData.length + 4];
115 data[0] = 0;
116 data[1] = 0;
117 data[2] = (byte) 0xFE;
118 data[3] = (byte) 0xFF;
119 System.arraycopy(baseData, 0, data, 4, baseData.length);
120 }
121 return new ByteArrayInputStream(data);
122 }
123
124
125
126
127 private InputStream createUtf32LeDataStream(final byte[] baseData, final boolean addBOM) {
128 byte[] data = baseData;
129 if (addBOM) {
130 data = new byte[baseData.length + 4];
131 data[0] = (byte) 0xFF;
132 data[1] = (byte) 0xFE;
133 data[2] = 0;
134 data[3] = 0;
135 System.arraycopy(baseData, 0, data, 4, baseData.length);
136 }
137 return new ByteArrayInputStream(data);
138 }
139
140
141
142
143 private InputStream createUtf8Input(final byte[] baseData, final boolean addBOM) {
144 byte[] data = baseData;
145 if (addBOM) {
146 data = new byte[baseData.length + 3];
147 data[0] = (byte) 0xEF;
148 data[1] = (byte) 0xBB;
149 data[2] = (byte) 0xBF;
150 System.arraycopy(baseData, 0, data, 3, baseData.length);
151 }
152 return new ByteArrayInputStream(data);
153 }
154
155 private boolean doesSaxSupportCharacterSet(final String charsetName) throws ParserConfigurationException, SAXException, IOException {
156 final DocumentBuilder documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
157 try (InputStream byteStream = CharSequenceInputStream.builder().setCharSequence("<?xml version=\"1.0\" encoding=\"" + charsetName + "\"?><Z/>")
158 .setCharset(charsetName).get()) {
159 final InputSource is = new InputSource(byteStream);
160 is.setEncoding(charsetName);
161 documentBuilder.parse(is);
162 } catch (final SAXParseException e) {
163 if (e.getMessage().contains(charsetName)) {
164 return false;
165 }
166 }
167 return true;
168 }
169
170 private boolean jvmAndSaxBothSupportCharset(final String charSetName) throws ParserConfigurationException, SAXException, IOException {
171 return Charset.isSupported(charSetName) && doesSaxSupportCharacterSet(charSetName);
172 }
173
174 private void parseXml(final InputStream in) throws SAXException, IOException, ParserConfigurationException {
175 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
176 assertNotNull(doc);
177 assertEquals("X", doc.getFirstChild().getNodeName());
178 }
179
180 private void parseXml(final Reader in) throws SAXException, IOException, ParserConfigurationException {
181 final Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(in));
182 assertNotNull(doc);
183 assertEquals("X", doc.getFirstChild().getNodeName());
184 }
185
186 private void readBOMInputStreamTwice(final String resource) throws Exception {
187 try (InputStream inputStream = this.getClass().getResourceAsStream(resource)) {
188 assertNotNull(inputStream);
189 try (BOMInputStream bomInputStream = BOMInputStream.builder().setInputStream(inputStream).get()) {
190 bomInputStream.mark(1_000_000);
191
192 this.readFile(bomInputStream);
193 bomInputStream.reset();
194 this.readFile(bomInputStream);
195 inputStream.close();
196 }
197 }
198 }
199
200 private void readFile(final BOMInputStream bomInputStream) throws Exception {
201 int bytes;
202 final byte[] bytesFromStream = new byte[100];
203 do {
204 bytes = bomInputStream.read(bytesFromStream);
205 } while (bytes > 0);
206 }
207
208 @Test
209 public void testAvailableWithBOM() throws Exception {
210 final byte[] data = { 'A', 'B', 'C', 'D' };
211 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
212 assertEquals(7, in.available());
213 }
214 }
215
216 @Test
217 public void testAvailableWithoutBOM() throws Exception {
218 final byte[] data = { 'A', 'B', 'C', 'D' };
219 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
220 assertEquals(4, in.available());
221 }
222 }
223
224 @Test
225
226 public void testClose() throws Exception {
227 try (ExpectCloseInputStream del = new ExpectCloseInputStream()) {
228 try (InputStream in = new BOMInputStream(del)) {
229
230 }
231 del.assertCloseCalled();
232 }
233 }
234
235 @Test
236 public void testEmptyBufferWithBOM() throws Exception {
237 final byte[] data = {};
238 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
239 final byte[] buf = new byte[1024];
240 assertEquals(-1, in.read(buf));
241 }
242 }
243
244 @Test
245 public void testEmptyBufferWithoutBOM() throws Exception {
246 final byte[] data = {};
247 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
248 final byte[] buf = new byte[1024];
249 assertEquals(-1, in.read(buf));
250 }
251 }
252
253 @Test
254 public void testGetBOMFirstThenRead() throws Exception {
255 final byte[] data = { 'A', 'B', 'C' };
256 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
257 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
258 assertTrue(in.hasBOM(), "hasBOM()");
259 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
260 assertEquals('A', in.read());
261 assertEquals('B', in.read());
262 assertEquals('C', in.read());
263 assertEquals(-1, in.read());
264 }
265 }
266
267 @Test
268 public void testGetBOMFirstThenReadInclude() throws Exception {
269 final byte[] data = { 'A', 'B', 'C' };
270 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).setInclude(true).get()) {
271 assertTrue(in.hasBOM(), "hasBOM()");
272 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
273 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
274 assertEquals(0xEF, in.read());
275 assertEquals(0xBB, in.read());
276 assertEquals(0xBF, in.read());
277 assertEquals('A', in.read());
278 assertEquals('B', in.read());
279 assertEquals('C', in.read());
280 assertEquals(-1, in.read());
281 }
282 }
283
284 @Test
285 public void testLargeBufferWithBOM() throws Exception {
286 final byte[] data = { 'A', 'B', 'C' };
287 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
288 final byte[] buf = new byte[1024];
289 assertData(data, buf, in.read(buf));
290 }
291 }
292
293 @Test
294 public void testLargeBufferWithoutBOM() throws Exception {
295 final byte[] data = { 'A', 'B', 'C' };
296 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
297 final byte[] buf = new byte[1024];
298 assertData(data, buf, in.read(buf));
299 }
300 }
301
302 @Test
303 public void testLeadingNonBOMBufferedRead() throws Exception {
304 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
305 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
306 final byte[] buf = new byte[1024];
307 assertData(data, buf, in.read(buf));
308 }
309 }
310
311 @Test
312 public void testLeadingNonBOMSingleRead() throws Exception {
313 final byte[] data = { (byte) 0xEF, (byte) 0xAB, (byte) 0xCD };
314 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
315 assertEquals(0xEF, in.read());
316 assertEquals(0xAB, in.read());
317 assertEquals(0xCD, in.read());
318 assertEquals(-1, in.read());
319 }
320 }
321
322 @Test
323 public void testMarkResetAfterReadWithBOM() throws Exception {
324 final byte[] data = { 'A', 'B', 'C', 'D' };
325 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
326 assertTrue(in.markSupported());
327
328 in.read();
329 in.mark(10);
330
331 in.read();
332 in.read();
333 in.reset();
334 assertEquals('B', in.read());
335 }
336 }
337
338 @Test
339 public void testMarkResetAfterReadWithoutBOM() throws Exception {
340 final byte[] data = { 'A', 'B', 'C', 'D' };
341 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
342 assertTrue(in.markSupported());
343
344 in.read();
345 in.mark(10);
346
347 in.read();
348 in.read();
349 in.reset();
350 assertEquals('B', in.read());
351 }
352 }
353
354 @Test
355 public void testMarkResetBeforeReadWithBOM() throws Exception {
356 final byte[] data = { 'A', 'B', 'C', 'D' };
357 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
358 assertTrue(in.markSupported());
359
360 in.mark(10);
361
362 in.read();
363 in.read();
364 in.reset();
365 assertEquals('A', in.read());
366 }
367 }
368
369 @Test
370 public void testMarkResetBeforeReadWithoutBOM() throws Exception {
371 final byte[] data = { 'A', 'B', 'C', 'D' };
372 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
373 assertTrue(in.markSupported());
374
375 in.mark(10);
376
377 in.read();
378 in.read();
379 in.reset();
380 assertEquals('A', in.read());
381 }
382 }
383
384 @Test
385 public void testNoBoms() throws Exception {
386 final byte[] data = { 'A', 'B', 'C' };
387 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, (ByteOrderMark[]) null).close());
388 assertThrows(IllegalArgumentException.class, () -> new BOMInputStream(createUtf8Input(data, true), false, new ByteOrderMark[0]).close());
389
390 try (final BOMInputStream bomInputStream = BOMInputStream.builder()
391 .setInputStream(createUtf8Input(data, true))
392 .setInclude(true)
393 .setByteOrderMarks((ByteOrderMark[]) null)
394 .get()) {
395 assertEquals(BOMInputStream.Builder.getDefaultByteOrderMark(), bomInputStream.getBOM());
396 }
397 assertThrows(IllegalArgumentException.class, () -> BOMInputStream.builder()
398 .setInputStream(createUtf8Input(data, true))
399 .setInclude(true)
400 .setByteOrderMarks()
401 .get()
402 .close());
403 }
404
405 @Test
406 public void testReadEmpty() throws Exception {
407 final byte[] data = {};
408 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
409 assertEquals(-1, in.read());
410 assertFalse(in.hasBOM(), "hasBOM()");
411 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
412 assertNull(in.getBOM(), "getBOM");
413 }
414 }
415
416 @Test
417 public void testReadSmall() throws Exception {
418 final byte[] data = { 'A', 'B' };
419 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
420 assertEquals('A', in.read());
421 assertEquals('B', in.read());
422 assertEquals(-1, in.read());
423 assertFalse(in.hasBOM(), "hasBOM()");
424 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
425 assertNull(in.getBOM(), "getBOM");
426 }
427 }
428
429 @Test
430 public void testReadTwiceWithBOM() throws Exception {
431 this.readBOMInputStreamTwice("/org/apache/commons/io/testfileBOM.xml");
432 }
433
434 @Test
435 public void testReadTwiceWithoutBOM() throws Exception {
436 this.readBOMInputStreamTwice("/org/apache/commons/io/testfileNoBOM.xml");
437 }
438
439 @Test
440 public void testReadWithBOMInclude() throws Exception {
441 final byte[] data = { 'A', 'B', 'C' };
442 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), true)) {
443 assertEquals(0xEF, in.read());
444 assertEquals(0xBB, in.read());
445 assertEquals(0xBF, in.read());
446 assertEquals('A', in.read());
447 assertEquals('B', in.read());
448 assertEquals('C', in.read());
449 assertEquals(-1, in.read());
450 assertTrue(in.hasBOM(), "hasBOM()");
451 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
452 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
453 }
454 }
455
456 @Test
457 public void testReadWithBOMUtf16Be() throws Exception {
458 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE);
459 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
460 assertEquals(0, in.read());
461 assertEquals('A', in.read());
462 assertEquals(0, in.read());
463 assertEquals('B', in.read());
464 assertEquals(0, in.read());
465 assertEquals('C', in.read());
466 assertEquals(-1, in.read());
467 assertTrue(in.hasBOM(), "hasBOM()");
468 assertTrue(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
469 assertEquals(ByteOrderMark.UTF_16BE, in.getBOM(), "getBOM");
470 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16LE));
471 }
472 }
473
474 @Test
475 public void testReadWithBOMUtf16Le() throws Exception {
476 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE);
477 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
478 assertEquals('A', in.read());
479 assertEquals(0, in.read());
480 assertEquals('B', in.read());
481 assertEquals(0, in.read());
482 assertEquals('C', in.read());
483 assertEquals(0, in.read());
484 assertEquals(-1, in.read());
485 assertTrue(in.hasBOM(), "hasBOM()");
486 assertTrue(in.hasBOM(ByteOrderMark.UTF_16LE), "hasBOM(UTF-16LE)");
487 assertEquals(ByteOrderMark.UTF_16LE, in.getBOM(), "getBOM");
488 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
489 }
490 }
491
492 @Test
493 public void testReadWithBOMUtf32Be() throws Exception {
494 assumeTrue(Charset.isSupported("UTF_32BE"));
495 final byte[] data = "ABC".getBytes("UTF_32BE");
496 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true),
497 ByteOrderMark.UTF_32BE)) {
498 assertEquals(0, in.read());
499 assertEquals(0, in.read());
500 assertEquals(0, in.read());
501 assertEquals('A', in.read());
502 assertEquals(0, in.read());
503 assertEquals(0, in.read());
504 assertEquals(0, in.read());
505 assertEquals('B', in.read());
506 assertEquals(0, in.read());
507 assertEquals(0, in.read());
508 assertEquals(0, in.read());
509 assertEquals('C', in.read());
510 assertEquals(-1, in.read());
511 assertTrue(in.hasBOM(), "hasBOM()");
512 assertTrue(in.hasBOM(ByteOrderMark.UTF_32BE), "hasBOM(UTF-32BE)");
513 assertEquals(ByteOrderMark.UTF_32BE, in.getBOM(), "getBOM");
514 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32LE));
515 }
516 }
517
518 @Test
519 public void testReadWithBOMUtf32Le() throws Exception {
520 assumeTrue(Charset.isSupported("UTF_32LE"));
521 final byte[] data = "ABC".getBytes("UTF_32LE");
522 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true),
523 ByteOrderMark.UTF_32LE)) {
524 assertEquals('A', in.read());
525 assertEquals(0, in.read());
526 assertEquals(0, in.read());
527 assertEquals(0, in.read());
528 assertEquals('B', in.read());
529 assertEquals(0, in.read());
530 assertEquals(0, in.read());
531 assertEquals(0, in.read());
532 assertEquals('C', in.read());
533 assertEquals(0, in.read());
534 assertEquals(0, in.read());
535 assertEquals(0, in.read());
536 assertEquals(-1, in.read());
537 assertTrue(in.hasBOM(), "hasBOM()");
538 assertTrue(in.hasBOM(ByteOrderMark.UTF_32LE), "hasBOM(UTF-32LE)");
539 assertEquals(ByteOrderMark.UTF_32LE, in.getBOM(), "getBOM");
540 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_32BE));
541 }
542 }
543
544 @Test
545 public void testReadWithBOMUtf8() throws Exception {
546 final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8);
547 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_8)) {
548 assertEquals('A', in.read());
549 assertEquals('B', in.read());
550 assertEquals('C', in.read());
551 assertEquals(-1, in.read());
552 assertTrue(in.hasBOM(), "hasBOM()");
553 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
554 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
555 assertThrows(IllegalArgumentException.class, () -> in.hasBOM(ByteOrderMark.UTF_16BE));
556 }
557 }
558
559 @Test
560 public void testReadWithMultipleBOM() throws Exception {
561 final byte[] data = { 'A', 'B', 'C' };
562 try (BOMInputStream in = new BOMInputStream(createUtf8Input(data, true), ByteOrderMark.UTF_16BE,
563 ByteOrderMark.UTF_8)) {
564 assertEquals('A', in.read());
565 assertEquals('B', in.read());
566 assertEquals('C', in.read());
567 assertEquals(-1, in.read());
568 assertTrue(in.hasBOM(), "hasBOM()");
569 assertTrue(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
570 assertFalse(in.hasBOM(ByteOrderMark.UTF_16BE), "hasBOM(UTF-16BE)");
571 assertEquals(ByteOrderMark.UTF_8, in.getBOM(), "getBOM");
572 }
573 }
574
575 @Test
576 public void testReadWithoutBOM() throws Exception {
577 final byte[] data = { 'A', 'B', 'C' };
578 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
579 assertEquals('A', in.read());
580 assertEquals('B', in.read());
581 assertEquals('C', in.read());
582 assertEquals(-1, in.read());
583 assertFalse(in.hasBOM(), "hasBOM()");
584 assertFalse(in.hasBOM(ByteOrderMark.UTF_8), "hasBOM(UTF-8)");
585 assertNull(in.getBOM(), "getBOM");
586 }
587 }
588
589 @Test
590 public void testReadXmlWithBOMUcs2() throws Exception {
591 assumeFalse(System.getProperty("java.vendor").contains("IBM"), "This test does not pass on some IBM VMs xml parsers");
592
593
594 assumeTrue(Charset.isSupported("ISO-10646-UCS-2"));
595 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-2\"?><X/>".getBytes("ISO-10646-UCS-2");
596 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
597 parseXml(in);
598 }
599 parseXml(createUtf16BeDataStream(data, true));
600 }
601
602 @Test
603 public void testReadXmlWithBOMUcs4() throws Exception {
604
605
606 assumeTrue(Charset.isSupported("ISO-10646-UCS-4"));
607 final byte[] data = "<?xml version=\"1.0\" encoding=\"ISO-10646-UCS-4\"?><X/>".getBytes("ISO-10646-UCS-4");
608
609 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
610 parseXml(in);
611
612 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
613 }
614 parseXml(createUtf32BeDataStream(data, true));
615 }
616
617 @Test
618 public void testReadXmlWithBOMUtf16Be() throws Exception {
619 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16BE\"?><X/>".getBytes(StandardCharsets.UTF_16BE);
620 try (BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE)) {
621 parseXml(in);
622 }
623 parseXml(createUtf16BeDataStream(data, true));
624 }
625
626 @Test
627 public void testReadXmlWithBOMUtf16Le() throws Exception {
628 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-16LE\"?><X/>".getBytes(StandardCharsets.UTF_16LE);
629 try (BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE)) {
630 parseXml(in);
631 }
632 parseXml(createUtf16LeDataStream(data, true));
633 }
634
635 @Test
636 public void testReadXmlWithBOMUtf32Be() throws Exception {
637 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
638 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE");
639 try (BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)) {
640 parseXml(in);
641 }
642
643 try (XmlStreamReader in = new XmlStreamReader(createUtf32BeDataStream(data, true))) {
644 parseXml(in);
645 }
646 }
647
648 @Test
649 public void testReadXmlWithBOMUtf32Le() throws Exception {
650 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
651 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
652 try (BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)) {
653 parseXml(in);
654 }
655
656 try (XmlStreamReader in = new XmlStreamReader(createUtf32LeDataStream(data, true))) {
657 parseXml(in);
658 }
659 }
660
661 @Test
662 public void testReadXmlWithBOMUtf8() throws Exception {
663 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><X/>".getBytes(StandardCharsets.UTF_8);
664 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
665 parseXml(in);
666 }
667 parseXml(createUtf8Input(data, true));
668 }
669
670 @Test
671 public void testReadXmlWithoutBOMUtf32Be() throws Exception {
672 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32BE"), "JVM and SAX need to support UTF_32BE for this");
673 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF_32BE\"?><X/>".getBytes("UTF_32BE");
674 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
675 parseXml(in);
676 }
677 parseXml(createUtf32BeDataStream(data, false));
678 }
679
680 @Test
681 public void testReadXmlWithoutBOMUtf32Le() throws Exception {
682 assumeTrue(jvmAndSaxBothSupportCharset("UTF_32LE"), "JVM and SAX need to support UTF_32LE for this");
683 final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE");
684 try (BOMInputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
685 parseXml(in);
686 }
687 parseXml(createUtf32BeDataStream(data, false));
688 }
689
690
691 @Test
692 public void testSkipReturnValueWithBom() throws IOException {
693 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
694 try (BOMInputStream is1 = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
695 assertEquals(2, is1.skip(2));
696 assertEquals((byte) 0x33, is1.read());
697 }
698 }
699
700 @Test
701 public void testSkipReturnValueWithoutBom() throws IOException {
702 final byte[] data = { (byte) 0x31, (byte) 0x32, (byte) 0x33 };
703 try (BOMInputStream is2 = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
704 assertEquals(2, is2.skip(2));
705 assertEquals((byte) 0x33, is2.read());
706 }
707 }
708
709 @Test
710 public void testSkipWithBOM() throws Exception {
711 final byte[] data = { 'A', 'B', 'C', 'D' };
712 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
713 in.skip(2L);
714 assertEquals('C', in.read());
715 }
716 }
717
718 @Test
719 public void testSkipWithoutBOM() throws Exception {
720 final byte[] data = { 'A', 'B', 'C', 'D' };
721 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
722 in.skip(2L);
723 assertEquals('C', in.read());
724 }
725 }
726
727 @Test
728 public void testSmallBufferWithBOM() throws Exception {
729 final byte[] data = { 'A', 'B', 'C' };
730 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, true)).get()) {
731 final byte[] buf = new byte[1024];
732 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
733 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
734 }
735 }
736
737 @Test
738 public void testSmallBufferWithoutBOM() throws Exception {
739 final byte[] data = { 'A', 'B', 'C' };
740 try (InputStream in = BOMInputStream.builder().setInputStream(createUtf8Input(data, false)).get()) {
741 final byte[] buf = new byte[1024];
742 assertData(new byte[] { 'A', 'B' }, buf, in.read(buf, 0, 2));
743 assertData(new byte[] { 'C' }, buf, in.read(buf, 0, 2));
744 }
745 }
746
747 @Test
748
749 public void testSupportCode() throws Exception {
750 try (InputStream in = createUtf8Input(new byte[] { 'A', 'B' }, true)) {
751 final byte[] buf = new byte[1024];
752 final int len = in.read(buf);
753 assertEquals(5, len);
754 assertEquals(0xEF, buf[0] & 0xFF);
755 assertEquals(0xBB, buf[1] & 0xFF);
756 assertEquals(0xBF, buf[2] & 0xFF);
757 assertEquals('A', buf[3] & 0xFF);
758 assertEquals('B', buf[4] & 0xFF);
759
760 assertData(new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF, 'A', 'B' }, buf, len);
761 }
762 }
763 }