1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertNotEquals;
22 import static org.junit.jupiter.api.Assertions.assertNotNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25 import static org.junit.jupiter.api.Assertions.fail;
26
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.StringReader;
30 import java.nio.ByteBuffer;
31 import java.nio.CharBuffer;
32 import java.nio.charset.Charset;
33 import java.nio.charset.CharsetEncoder;
34 import java.nio.charset.CoderResult;
35 import java.nio.charset.CodingErrorAction;
36 import java.nio.charset.StandardCharsets;
37 import java.nio.charset.UnmappableCharacterException;
38 import java.util.Random;
39
40 import org.apache.commons.io.CharsetsTest;
41 import org.apache.commons.io.IOUtils;
42 import org.apache.commons.lang3.StringUtils;
43 import org.junit.jupiter.api.Test;
44 import org.junit.jupiter.params.ParameterizedTest;
45 import org.junit.jupiter.params.provider.MethodSource;
46
47 class CharSequenceInputStreamTest {
48
49 private static final String UTF_16 = StandardCharsets.UTF_16.name();
50 private static final String UTF_8 = StandardCharsets.UTF_8.name();
51 private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
52 private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
53 private static final String LARGE_TEST_STRING = StringUtils.repeat(TEST_STRING, 100);
54
55 private final Random random = new Random();
56
57 private int checkAvail(final InputStream is, final int min) throws Exception {
58 final int available = is.available();
59 assertTrue(available >= min, "avail should be >= " + min + ", but was " + available);
60 return available;
61 }
62
63 private boolean isAvailabilityTestableForCharset(final String csName) {
64 return Charset.forName(csName).canEncode()
65 && !"COMPOUND_TEXT".equalsIgnoreCase(csName) && !"x-COMPOUND_TEXT".equalsIgnoreCase(csName)
66 && !isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(csName);
67 }
68
69 private boolean isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(final String csName) {
70 return "x-IBM1388".equalsIgnoreCase(csName) ||
71 "ISO-2022-CN".equalsIgnoreCase(csName) ||
72 "ISO-2022-JP".equalsIgnoreCase(csName) ||
73 "Shift_JIS".equalsIgnoreCase(csName);
74 }
75
76 @ParameterizedTest(name = "{0}")
77 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
78 void testAvailable(final String csName) throws Exception {
79
80
81
82
83
84
85
86
87
88
89
90 try {
91 if (isAvailabilityTestableForCharset(csName)) {
92 testAvailableSkip(csName);
93 testAvailableRead(csName);
94 }
95 } catch (final UnsupportedOperationException e) {
96 fail("Operation not supported for " + csName);
97 }
98 }
99
100 @Test
101 void testAvailableAfterClose() throws Exception {
102 final InputStream shadow;
103 try (InputStream in = CharSequenceInputStream.builder().setCharSequence("Hi").get()) {
104 assertTrue(in.available() > 0);
105 shadow = in;
106 }
107 assertEquals(0, shadow.available());
108 }
109
110
111
112
113 @Test
114 void testAvailableAfterOpen() throws IOException {
115 final Charset charset = Charset.forName("Big5");
116 try (CharSequenceInputStream in = new CharSequenceInputStream("\uD800\uDC00", charset)) {
117 final int available = in.available();
118 final byte[] data = new byte[available];
119 final int bytesRead = in.read(data);
120 assertEquals(available, bytesRead);
121 }
122 }
123
124 private void testAvailableRead(final String csName) throws Exception {
125 final String input = "test";
126 try (InputStream r = new CharSequenceInputStream(input, csName)) {
127 int available = checkAvail(r, input.length());
128 assertEquals(available - 1, r.skip(available - 1));
129 available = checkAvail(r, 1);
130 final byte[] buff = new byte[available];
131 assertEquals(available, r.read(buff, 0, available));
132 }
133 }
134
135 private void testAvailableSkip(final String csName) throws Exception {
136 final String input = "test";
137 try (InputStream r = new CharSequenceInputStream(input, csName)) {
138 int available = checkAvail(r, input.length());
139 assertEquals(available - 1, r.skip(available - 1));
140 available = checkAvail(r, 1);
141 assertEquals(1, r.skip(1));
142 available = checkAvail(r, 0);
143 }
144 }
145
146 private void testBufferedRead(final String testString, final String charsetName) throws IOException {
147 final byte[] expected = testString.getBytes(charsetName);
148 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
149 final byte[] buffer = new byte[128];
150 int offset = 0; while (true) {
151 int bufferOffset = random.nextInt(64);
152 final int bufferLength = random.nextInt(64);
153 int read = in.read(buffer, bufferOffset, bufferLength);
154 if (read == -1) {
155 assertEquals(expected.length, offset, "EOF: offset should equal length for charset " + charsetName);
156 break;
157 }
158 assertTrue(read <= bufferLength, "Read " + read + " <= " + bufferLength);
159 while (read > 0) {
160 assertTrue(offset < expected.length,
161 "offset for " + charsetName + " " + offset + " < " + expected.length);
162 assertEquals(expected[offset], buffer[bufferOffset], "bytes should agree for " + charsetName);
163 offset++;
164 bufferOffset++;
165 read--;
166 }
167 }
168 }
169 }
170
171
172
173
174
175
176
177
178 @ParameterizedTest(name = "{0}")
179 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
180 void testBufferedRead_AvailableCharset(final String csName) throws IOException {
181
182 if (isAvailabilityTestableForCharset(csName)) {
183 testBufferedRead(TEST_STRING, csName);
184 }
185 }
186
187 @ParameterizedTest
188 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
189 void testBufferedRead_RequiredCharset(final String csName) throws IOException {
190 testBufferedRead(TEST_STRING, csName);
191 }
192
193 @Test
194 void testBufferedRead_UTF8() throws IOException {
195 testBufferedRead(TEST_STRING, UTF_8);
196 }
197
198 @Test
199 void testCharacterCodingException() throws IOException {
200 final Charset charset = StandardCharsets.US_ASCII;
201 final CharSequenceInputStream in = CharSequenceInputStream.builder()
202 .setCharsetEncoder(charset.newEncoder().onUnmappableCharacter(CodingErrorAction.REPORT))
203 .setCharSequence("\u0080")
204 .get();
205 assertEquals(0, in.available());
206 assertThrows(UnmappableCharacterException.class, in::read);
207 }
208
209 private void testCharsetMismatchInfiniteLoop(final String csName) throws IOException {
210
211 final char[] inputChars = { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
212 final Charset charset = Charset.forName(csName);
213 try (InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512)) {
214 IOUtils.toCharArray(stream, charset);
215 }
216 try (InputStream stream = CharSequenceInputStream.builder().setCharSequence(new String(inputChars)).setCharset(charset).setBufferSize(512).get()) {
217 IOUtils.toCharArray(stream, charset);
218 }
219 }
220
221 @ParameterizedTest
222 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
223 void testCharsetMismatchInfiniteLoop_RequiredCharsets(final String csName) throws IOException {
224 testCharsetMismatchInfiniteLoop(csName);
225 }
226
227
228
229
230 private void testIO_356(final int bufferSize, final int dataSize, final int readFirst, final String csName) throws Exception {
231 final byte[] data1;
232 final byte[] data2;
233 try (CharSequenceInputStream is = new CharSequenceInputStream(ALPHABET, csName, bufferSize)) {
234 for (int i = 0; i < readFirst; i++) {
235 final int ch = is.read();
236 assertNotEquals(-1, ch);
237 }
238
239 is.mark(dataSize);
240
241 data1 = new byte[dataSize];
242 final int readCount1 = is.read(data1);
243 assertEquals(dataSize, readCount1);
244
245 is.reset();
246
247 data2 = new byte[dataSize];
248 final int readCount2 = is.read(data2);
249 assertEquals(dataSize, readCount2);
250 }
251
252
253 assertArrayEquals(data1, data2, "bufferSize=" + bufferSize + " dataSize=" + dataSize);
254 }
255
256 @Test
257 void testIO_356_B10_D10_S0_UTF16() throws Exception {
258 testIO_356(10, 10, 0, UTF_16);
259 }
260
261 @Test
262 void testIO_356_B10_D10_S0_UTF8() throws Exception {
263 testIO_356(10, 10, 0, UTF_8);
264 }
265
266 @Test
267 void testIO_356_B10_D10_S1_UTF8() throws Exception {
268 testIO_356(10, 10, 1, UTF_8);
269 }
270
271 @Test
272 void testIO_356_B10_D10_S2_UTF8() throws Exception {
273 testIO_356(10, 10, 2, UTF_8);
274 }
275
276 @Test
277 void testIO_356_B10_D13_S0_UTF8() throws Exception {
278 testIO_356(10, 13, 0, UTF_8);
279 }
280
281 @Test
282 void testIO_356_B10_D13_S1_UTF8() throws Exception {
283 testIO_356(10, 13, 1, UTF_8);
284 }
285
286 @Test
287 void testIO_356_B10_D20_S0_UTF8() throws Exception {
288 testIO_356(10, 20, 0, UTF_8);
289 }
290
291 private void testIO_356_Loop(final String csName, final int maxBytesPerChar) throws Exception {
292 for (int bufferSize = maxBytesPerChar; bufferSize <= 10; bufferSize++) {
293 for (int dataSize = 1; dataSize <= 20; dataSize++) {
294 testIO_356(bufferSize, dataSize, 0, csName);
295 }
296 }
297 }
298
299 @Test
300 void testIO_356_Loop_UTF16() throws Exception {
301 final Charset charset = StandardCharsets.UTF_16;
302 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
303 }
304
305 @Test
306 void testIO_356_Loop_UTF8() throws Exception {
307 final Charset charset = StandardCharsets.UTF_8;
308 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
309 }
310
311 @ParameterizedTest
312 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
313 void testLargeBufferedRead_RequiredCharsets(final String csName) throws IOException {
314 testBufferedRead(LARGE_TEST_STRING, csName);
315 }
316
317 @Test
318 void testLargeBufferedRead_UTF8() throws IOException {
319 testBufferedRead(LARGE_TEST_STRING, UTF_8);
320 }
321
322 @ParameterizedTest
323 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
324 void testLargeSingleByteRead_RequiredCharsets(final String csName) throws IOException {
325 testSingleByteRead(LARGE_TEST_STRING, csName);
326 }
327
328 @Test
329 void testLargeSingleByteRead_UTF8() throws IOException {
330 testSingleByteRead(LARGE_TEST_STRING, UTF_8);
331 }
332
333
334
335 private void testMarkReset(final String csName) throws Exception {
336 try (InputStream r = new CharSequenceInputStream("test", csName)) {
337 assertEquals(2, r.skip(2));
338 r.mark(0);
339 assertEquals('s', r.read(), csName);
340 assertEquals('t', r.read(), csName);
341 assertEquals(-1, r.read(), csName);
342 r.reset();
343 assertEquals('s', r.read(), csName);
344 assertEquals('t', r.read(), csName);
345 assertEquals(-1, r.read(), csName);
346 r.reset();
347 r.reset();
348 }
349 }
350
351 @ParameterizedTest
352 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
353 void testMarkReset_RequiredCharsets(final String csName) throws Exception {
354 testMarkResetMultiByteChars(csName);
355 }
356
357 @Test
358 void testMarkReset_USASCII() throws Exception {
359 testMarkReset(StandardCharsets.US_ASCII.name());
360 }
361
362 @Test
363 void testMarkReset_UTF8() throws Exception {
364 testMarkReset(UTF_8);
365 }
366
367 private void testMarkResetMultiByteChars(final String csName) throws IOException {
368
369 final String sequenceEnglish = "Test Sequence";
370 final String sequenceCJK = "\u4e01\u4f23\u5045\u5167\u5289\u53ab";
371 final String[] sequences = {sequenceEnglish, sequenceCJK};
372 for (final String testSequence : sequences) {
373 final CharsetEncoder charsetEncoder = Charset.forName(csName).newEncoder();
374 final ByteBuffer byteBuffer = ByteBuffer.allocate(testSequence.length() * 3);
375 final CharBuffer charBuffer = CharBuffer.wrap(testSequence);
376 final CoderResult result = charsetEncoder.encode(charBuffer, byteBuffer, true);
377 if (result.isUnmappable()) {
378 continue;
379 }
380 final byte[] expectedBytes = byteBuffer.array();
381
382 final int bLength = byteBuffer.position();
383 final int skip = bLength - 4;
384 try (InputStream r = new CharSequenceInputStream(testSequence, csName)) {
385 assertEquals(skip, r.skip(skip));
386 r.mark(0);
387 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
388 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
389 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
390 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
391 assertEquals(-1, (byte) r.read(), csName);
392 r.reset();
393 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
394 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
395 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
396 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
397 assertEquals(-1, (byte) r.read(), csName);
398 r.reset();
399 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
400 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
401 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
402 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
403 assertEquals(-1, (byte) r.read(), csName);
404 }
405 }
406 }
407
408 @Test
409 void testMarkSupported() throws Exception {
410 try (@SuppressWarnings("deprecation")
411 InputStream r = new CharSequenceInputStream("test", UTF_8)) {
412 assertTrue(r.markSupported());
413 }
414 try (InputStream r = CharSequenceInputStream.builder().setCharSequence("test").setCharset(UTF_8).get()) {
415 assertTrue(r.markSupported());
416 }
417 }
418
419 @Test
420 void testNullCharset() throws IOException {
421 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (Charset) null)) {
422 IOUtils.toByteArray(in);
423 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
424 }
425 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((Charset) null).get()) {
426 IOUtils.toByteArray(in);
427 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
428 }
429 }
430
431 @Test
432 void testNullCharsetName() throws IOException {
433 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (String) null)) {
434 IOUtils.toByteArray(in);
435 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
436 }
437 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((String) null).get()) {
438 IOUtils.toByteArray(in);
439 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
440 }
441 }
442
443 @Test
444 void testReadAfterClose() throws Exception {
445 final InputStream shadow;
446 try (InputStream in = CharSequenceInputStream.builder().setCharSequence("Hi").get()) {
447 assertTrue(in.available() > 0);
448 shadow = in;
449 }
450 assertEquals(IOUtils.EOF, shadow.read());
451 }
452
453 private void testReadZero(final String csName) throws Exception {
454 try (InputStream r = new CharSequenceInputStream("test", csName)) {
455 final byte[] bytes = new byte[30];
456 assertEquals(0, r.read(bytes, 0, 0));
457 }
458 }
459
460 @Test
461 void testReadZero_EmptyString() throws Exception {
462 try (InputStream r = new CharSequenceInputStream("", UTF_8)) {
463 final byte[] bytes = new byte[30];
464 assertEquals(0, r.read(bytes, 0, 0));
465 }
466 }
467
468 @ParameterizedTest
469 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
470 void testReadZero_RequiredCharsets(final String csName) throws Exception {
471 testReadZero(csName);
472 }
473
474 private void testResetBeforeEnd(final CharSequenceInputStream inputStream) throws IOException {
475 inputStream.mark(1);
476 assertEquals('1', inputStream.read());
477 inputStream.reset();
478 assertEquals('1', inputStream.read());
479 assertEquals('2', inputStream.read());
480 inputStream.reset();
481 assertEquals('1', inputStream.read());
482 assertEquals('2', inputStream.read());
483 assertEquals('3', inputStream.read());
484 inputStream.reset();
485 assertEquals('1', inputStream.read());
486 assertEquals('2', inputStream.read());
487 assertEquals('3', inputStream.read());
488 assertEquals('4', inputStream.read());
489 inputStream.reset();
490 assertEquals('1', inputStream.read());
491 }
492
493 @Test
494 void testResetBeforeEndSetCharSequence() throws IOException {
495 try (CharSequenceInputStream inputStream = CharSequenceInputStream.builder().setCharSequence("1234").get()) {
496 testResetBeforeEnd(inputStream);
497 }
498 }
499
500 @Test
501 void testResetCharset() {
502 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((Charset) null).getCharset());
503 }
504
505 @Test
506 void testResetCharsetEncoder() {
507 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(null).getCharsetEncoder());
508 }
509
510 @Test
511 void testResetCharsetName() {
512 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((String) null).getCharset());
513 }
514
515 private void testSingleByteRead(final String testString, final String charsetName) throws IOException {
516 final byte[] bytes = testString.getBytes(charsetName);
517 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
518 for (final byte b : bytes) {
519 final int read = in.read();
520 assertTrue(read >= 0, "read " + read + " >=0 ");
521 assertTrue(read <= 255, "read " + read + " <= 255");
522 assertEquals(b, (byte) read, "Should agree with input");
523 }
524 assertEquals(-1, in.read());
525 }
526 }
527
528 @ParameterizedTest
529 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
530 void testSingleByteRead_RequiredCharsets(final String csName) throws IOException {
531 testSingleByteRead(TEST_STRING, csName);
532 }
533
534 @Test
535 void testSingleByteRead_UTF16() throws IOException {
536 testSingleByteRead(TEST_STRING, UTF_16);
537 }
538
539 @Test
540 void testSingleByteRead_UTF8() throws IOException {
541 testSingleByteRead(TEST_STRING, UTF_8);
542 }
543
544 @ParameterizedTest
545 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
546 void testSkip_RequiredCharsets(final String csName) throws Exception {
547 try (InputStream r = new CharSequenceInputStream("test", csName)) {
548 assertEquals(1, r.skip(1));
549 assertEquals(2, r.skip(2));
550 r.skip(100);
551 assertEquals(-1, r.read(), csName);
552 }
553 }
554 }