1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.io.input;
18
19 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertNotEquals;
22 import static org.junit.jupiter.api.Assertions.assertNotNull;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25 import static org.junit.jupiter.api.Assertions.fail;
26
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.StringReader;
30 import java.nio.ByteBuffer;
31 import java.nio.CharBuffer;
32 import java.nio.charset.Charset;
33 import java.nio.charset.CharsetEncoder;
34 import java.nio.charset.CoderResult;
35 import java.nio.charset.CodingErrorAction;
36 import java.nio.charset.StandardCharsets;
37 import java.nio.charset.UnmappableCharacterException;
38 import java.util.Random;
39
40 import org.apache.commons.io.CharsetsTest;
41 import org.apache.commons.io.IOUtils;
42 import org.junit.jupiter.api.Test;
43 import org.junit.jupiter.params.ParameterizedTest;
44 import org.junit.jupiter.params.provider.MethodSource;
45
46 public class CharSequenceInputStreamTest {
47
48 private static final String UTF_16 = StandardCharsets.UTF_16.name();
49 private static final String UTF_8 = StandardCharsets.UTF_8.name();
50 private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
51 private static final String LARGE_TEST_STRING;
52
53 private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
54
55 static {
56 final StringBuilder buffer = new StringBuilder();
57 for (int i = 0; i < 100; i++) {
58 buffer.append(TEST_STRING);
59 }
60 LARGE_TEST_STRING = buffer.toString();
61 }
62
63 private final Random random = new Random();
64
65 private int checkAvail(final InputStream is, final int min) throws Exception {
66 final int available = is.available();
67 assertTrue(available >= min, "avail should be >= " + min + ", but was " + available);
68 return available;
69 }
70
71 private boolean isAvailabilityTestableForCharset(final String csName) {
72 return Charset.forName(csName).canEncode()
73 && !"COMPOUND_TEXT".equalsIgnoreCase(csName) && !"x-COMPOUND_TEXT".equalsIgnoreCase(csName)
74 && !isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(csName);
75 }
76
77 private boolean isOddBallLegacyCharsetThatDoesNotSupportFrenchCharacters(final String csName) {
78 return "x-IBM1388".equalsIgnoreCase(csName) ||
79 "ISO-2022-CN".equalsIgnoreCase(csName) ||
80 "ISO-2022-JP".equalsIgnoreCase(csName) ||
81 "Shift_JIS".equalsIgnoreCase(csName);
82 }
83
84 @ParameterizedTest(name = "{0}")
85 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
86 public void testAvailable(final String csName) throws Exception {
87
88
89
90
91
92
93
94
95
96
97
98 try {
99 if (isAvailabilityTestableForCharset(csName)) {
100 testAvailableSkip(csName);
101 testAvailableRead(csName);
102 }
103 } catch (final UnsupportedOperationException e) {
104 fail("Operation not supported for " + csName);
105 }
106 }
107
108 @Test
109 public void testAvailableAfterClose() throws Exception {
110 final InputStream shadow;
111 try (InputStream in = CharSequenceInputStream.builder().setCharSequence("Hi").get()) {
112 assertTrue(in.available() > 0);
113 shadow = in;
114 }
115 assertEquals(0, shadow.available());
116 }
117
118
119
120
121 @Test
122 public void testAvailableAfterOpen() throws IOException {
123 final Charset charset = Charset.forName("Big5");
124 try (CharSequenceInputStream in = new CharSequenceInputStream("\uD800\uDC00", charset)) {
125 final int available = in.available();
126 final byte[] data = new byte[available];
127 final int bytesRead = in.read(data);
128 assertEquals(available, bytesRead);
129 }
130 }
131
132 private void testAvailableRead(final String csName) throws Exception {
133 final String input = "test";
134 try (InputStream r = new CharSequenceInputStream(input, csName)) {
135 int available = checkAvail(r, input.length());
136 assertEquals(available - 1, r.skip(available - 1));
137 available = checkAvail(r, 1);
138 final byte[] buff = new byte[available];
139 assertEquals(available, r.read(buff, 0, available));
140 }
141 }
142
143 private void testAvailableSkip(final String csName) throws Exception {
144 final String input = "test";
145 try (InputStream r = new CharSequenceInputStream(input, csName)) {
146 int available = checkAvail(r, input.length());
147 assertEquals(available - 1, r.skip(available - 1));
148 available = checkAvail(r, 1);
149 assertEquals(1, r.skip(1));
150 available = checkAvail(r, 0);
151 }
152 }
153
154 private void testBufferedRead(final String testString, final String charsetName) throws IOException {
155 final byte[] expected = testString.getBytes(charsetName);
156 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
157 final byte[] buffer = new byte[128];
158 int offset = 0; while (true) {
159 int bufferOffset = random.nextInt(64);
160 final int bufferLength = random.nextInt(64);
161 int read = in.read(buffer, bufferOffset, bufferLength);
162 if (read == -1) {
163 assertEquals(expected.length, offset, "EOF: offset should equal length for charset " + charsetName);
164 break;
165 }
166 assertTrue(read <= bufferLength, "Read " + read + " <= " + bufferLength);
167 while (read > 0) {
168 assertTrue(offset < expected.length,
169 "offset for " + charsetName + " " + offset + " < " + expected.length);
170 assertEquals(expected[offset], buffer[bufferOffset], "bytes should agree for " + charsetName);
171 offset++;
172 bufferOffset++;
173 read--;
174 }
175 }
176 }
177 }
178
179
180
181
182
183
184
185
186 @ParameterizedTest(name = "{0}")
187 @MethodSource(CharsetsTest.AVAIL_CHARSETS)
188 public void testBufferedRead_AvailableCharset(final String csName) throws IOException {
189
190 if (isAvailabilityTestableForCharset(csName)) {
191 testBufferedRead(TEST_STRING, csName);
192 }
193 }
194
195 @ParameterizedTest
196 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
197 public void testBufferedRead_RequiredCharset(final String csName) throws IOException {
198 testBufferedRead(TEST_STRING, csName);
199 }
200
201 @Test
202 public void testBufferedRead_UTF8() throws IOException {
203 testBufferedRead(TEST_STRING, UTF_8);
204 }
205
206 @Test
207 public void testCharacterCodingException() throws IOException {
208 final Charset charset = StandardCharsets.US_ASCII;
209 final CharSequenceInputStream in = CharSequenceInputStream.builder()
210 .setCharsetEncoder(charset.newEncoder().onUnmappableCharacter(CodingErrorAction.REPORT))
211 .setCharSequence("\u0080")
212 .get();
213 assertEquals(0, in.available());
214 assertThrows(UnmappableCharacterException.class, in::read);
215 }
216
217 private void testCharsetMismatchInfiniteLoop(final String csName) throws IOException {
218
219 final char[] inputChars = { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
220 final Charset charset = Charset.forName(csName);
221 try (InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512)) {
222 IOUtils.toCharArray(stream, charset);
223 }
224 try (InputStream stream = CharSequenceInputStream.builder().setCharSequence(new String(inputChars)).setCharset(charset).setBufferSize(512).get()) {
225 IOUtils.toCharArray(stream, charset);
226 }
227 }
228
229 @ParameterizedTest
230 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
231 public void testCharsetMismatchInfiniteLoop_RequiredCharsets(final String csName) throws IOException {
232 testCharsetMismatchInfiniteLoop(csName);
233 }
234
235
236
237
238 private void testIO_356(final int bufferSize, final int dataSize, final int readFirst, final String csName) throws Exception {
239 final byte[] data1;
240 final byte[] data2;
241 try (CharSequenceInputStream is = new CharSequenceInputStream(ALPHABET, csName, bufferSize)) {
242 for (int i = 0; i < readFirst; i++) {
243 final int ch = is.read();
244 assertNotEquals(-1, ch);
245 }
246
247 is.mark(dataSize);
248
249 data1 = new byte[dataSize];
250 final int readCount1 = is.read(data1);
251 assertEquals(dataSize, readCount1);
252
253 is.reset();
254
255 data2 = new byte[dataSize];
256 final int readCount2 = is.read(data2);
257 assertEquals(dataSize, readCount2);
258 }
259
260
261 assertArrayEquals(data1, data2, "bufferSize=" + bufferSize + " dataSize=" + dataSize);
262 }
263
264 @Test
265 public void testIO_356_B10_D10_S0_UTF16() throws Exception {
266 testIO_356(10, 10, 0, UTF_16);
267 }
268
269 @Test
270 public void testIO_356_B10_D10_S0_UTF8() throws Exception {
271 testIO_356(10, 10, 0, UTF_8);
272 }
273
274 @Test
275 public void testIO_356_B10_D10_S1_UTF8() throws Exception {
276 testIO_356(10, 10, 1, UTF_8);
277 }
278
279 @Test
280 public void testIO_356_B10_D10_S2_UTF8() throws Exception {
281 testIO_356(10, 10, 2, UTF_8);
282 }
283
284 @Test
285 public void testIO_356_B10_D13_S0_UTF8() throws Exception {
286 testIO_356(10, 13, 0, UTF_8);
287 }
288
289 @Test
290 public void testIO_356_B10_D13_S1_UTF8() throws Exception {
291 testIO_356(10, 13, 1, UTF_8);
292 }
293
294 @Test
295 public void testIO_356_B10_D20_S0_UTF8() throws Exception {
296 testIO_356(10, 20, 0, UTF_8);
297 }
298
299 private void testIO_356_Loop(final String csName, final int maxBytesPerChar) throws Exception {
300 for (int bufferSize = maxBytesPerChar; bufferSize <= 10; bufferSize++) {
301 for (int dataSize = 1; dataSize <= 20; dataSize++) {
302 testIO_356(bufferSize, dataSize, 0, csName);
303 }
304 }
305 }
306
307 @Test
308 public void testIO_356_Loop_UTF16() throws Exception {
309 final Charset charset = StandardCharsets.UTF_16;
310 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
311 }
312
313 @Test
314 public void testIO_356_Loop_UTF8() throws Exception {
315 final Charset charset = StandardCharsets.UTF_8;
316 testIO_356_Loop(charset.displayName(), (int) ReaderInputStream.minBufferSize(charset.newEncoder()));
317 }
318
319 @ParameterizedTest
320 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
321 public void testLargeBufferedRead_RequiredCharsets(final String csName) throws IOException {
322 testBufferedRead(LARGE_TEST_STRING, csName);
323 }
324
325 @Test
326 public void testLargeBufferedRead_UTF8() throws IOException {
327 testBufferedRead(LARGE_TEST_STRING, UTF_8);
328 }
329
330 @ParameterizedTest
331 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
332 public void testLargeSingleByteRead_RequiredCharsets(final String csName) throws IOException {
333 testSingleByteRead(LARGE_TEST_STRING, csName);
334 }
335
336 @Test
337 public void testLargeSingleByteRead_UTF8() throws IOException {
338 testSingleByteRead(LARGE_TEST_STRING, UTF_8);
339 }
340
341
342
343 private void testMarkReset(final String csName) throws Exception {
344 try (InputStream r = new CharSequenceInputStream("test", csName)) {
345 assertEquals(2, r.skip(2));
346 r.mark(0);
347 assertEquals('s', r.read(), csName);
348 assertEquals('t', r.read(), csName);
349 assertEquals(-1, r.read(), csName);
350 r.reset();
351 assertEquals('s', r.read(), csName);
352 assertEquals('t', r.read(), csName);
353 assertEquals(-1, r.read(), csName);
354 r.reset();
355 r.reset();
356 }
357 }
358
359 @ParameterizedTest
360 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
361 public void testMarkReset_RequiredCharsets(final String csName) throws Exception {
362 testMarkResetMultiByteChars(csName);
363 }
364
365 @Test
366 public void testMarkReset_USASCII() throws Exception {
367 testMarkReset(StandardCharsets.US_ASCII.name());
368 }
369
370 @Test
371 public void testMarkReset_UTF8() throws Exception {
372 testMarkReset(UTF_8);
373 }
374
375 private void testMarkResetMultiByteChars(final String csName) throws IOException {
376
377 final String sequenceEnglish = "Test Sequence";
378 final String sequenceCJK = "\u4e01\u4f23\u5045\u5167\u5289\u53ab";
379 final String[] sequences = {sequenceEnglish, sequenceCJK};
380 for (final String testSequence : sequences) {
381 final CharsetEncoder charsetEncoder = Charset.forName(csName).newEncoder();
382 final ByteBuffer byteBuffer = ByteBuffer.allocate(testSequence.length() * 3);
383 final CharBuffer charBuffer = CharBuffer.wrap(testSequence);
384 final CoderResult result = charsetEncoder.encode(charBuffer, byteBuffer, true);
385 if (result.isUnmappable()) {
386 continue;
387 }
388 final byte[] expectedBytes = byteBuffer.array();
389
390 final int bLength = byteBuffer.position();
391 final int skip = bLength - 4;
392 try (InputStream r = new CharSequenceInputStream(testSequence, csName)) {
393 assertEquals(skip, r.skip(skip));
394 r.mark(0);
395 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
396 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
397 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
398 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
399 assertEquals(-1, (byte) r.read(), csName);
400 r.reset();
401 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
402 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
403 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
404 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
405 assertEquals(-1, (byte) r.read(), csName);
406 r.reset();
407 assertEquals(expectedBytes[bLength - 4], (byte) r.read(), csName);
408 assertEquals(expectedBytes[bLength - 3], (byte) r.read(), csName);
409 assertEquals(expectedBytes[bLength - 2], (byte) r.read(), csName);
410 assertEquals(expectedBytes[bLength - 1], (byte) r.read(), csName);
411 assertEquals(-1, (byte) r.read(), csName);
412 }
413 }
414 }
415
416 @Test
417 public void testMarkSupported() throws Exception {
418 try (@SuppressWarnings("deprecation")
419 InputStream r = new CharSequenceInputStream("test", UTF_8)) {
420 assertTrue(r.markSupported());
421 }
422 try (InputStream r = CharSequenceInputStream.builder().setCharSequence("test").setCharset(UTF_8).get()) {
423 assertTrue(r.markSupported());
424 }
425 }
426
427 @Test
428 public void testNullCharset() throws IOException {
429 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (Charset) null)) {
430 IOUtils.toByteArray(in);
431 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
432 }
433 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((Charset) null).get()) {
434 IOUtils.toByteArray(in);
435 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
436 }
437 }
438
439 @Test
440 public void testNullCharsetName() throws IOException {
441 try (CharSequenceInputStream in = new CharSequenceInputStream("A", (String) null)) {
442 IOUtils.toByteArray(in);
443 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
444 }
445 try (CharSequenceInputStream in = CharSequenceInputStream.builder().setCharSequence("test").setCharset((String) null).get()) {
446 IOUtils.toByteArray(in);
447 assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
448 }
449 }
450
451 @Test
452 public void testReadAfterClose() throws Exception {
453 final InputStream shadow;
454 try (InputStream in = CharSequenceInputStream.builder().setCharSequence("Hi").get()) {
455 assertTrue(in.available() > 0);
456 shadow = in;
457 }
458 assertEquals(IOUtils.EOF, shadow.read());
459 }
460
461 private void testReadZero(final String csName) throws Exception {
462 try (InputStream r = new CharSequenceInputStream("test", csName)) {
463 final byte[] bytes = new byte[30];
464 assertEquals(0, r.read(bytes, 0, 0));
465 }
466 }
467
468 @Test
469 public void testReadZero_EmptyString() throws Exception {
470 try (InputStream r = new CharSequenceInputStream("", UTF_8)) {
471 final byte[] bytes = new byte[30];
472 assertEquals(0, r.read(bytes, 0, 0));
473 }
474 }
475
476 @ParameterizedTest
477 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
478 public void testReadZero_RequiredCharsets(final String csName) throws Exception {
479 testReadZero(csName);
480 }
481
482 private void testResetBeforeEnd(final CharSequenceInputStream inputStream) throws IOException {
483 inputStream.mark(1);
484 assertEquals('1', inputStream.read());
485 inputStream.reset();
486 assertEquals('1', inputStream.read());
487 assertEquals('2', inputStream.read());
488 inputStream.reset();
489 assertEquals('1', inputStream.read());
490 assertEquals('2', inputStream.read());
491 assertEquals('3', inputStream.read());
492 inputStream.reset();
493 assertEquals('1', inputStream.read());
494 assertEquals('2', inputStream.read());
495 assertEquals('3', inputStream.read());
496 assertEquals('4', inputStream.read());
497 inputStream.reset();
498 assertEquals('1', inputStream.read());
499 }
500
501 @Test
502 public void testResetBeforeEndSetCharSequence() throws IOException {
503 try (CharSequenceInputStream inputStream = CharSequenceInputStream.builder().setCharSequence("1234").get()) {
504 testResetBeforeEnd(inputStream);
505 }
506 }
507
508 @Test
509 public void testResetCharset() {
510 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((Charset) null).getCharset());
511 }
512
513 @Test
514 public void testResetCharsetEncoder() {
515 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(null).getCharsetEncoder());
516 }
517
518 @Test
519 public void testResetCharsetName() {
520 assertNotNull(CharSequenceInputStream.builder().setReader(new StringReader("\uD800")).setCharset((String) null).getCharset());
521 }
522
523 private void testSingleByteRead(final String testString, final String charsetName) throws IOException {
524 final byte[] bytes = testString.getBytes(charsetName);
525 try (InputStream in = new CharSequenceInputStream(testString, charsetName, 512)) {
526 for (final byte b : bytes) {
527 final int read = in.read();
528 assertTrue(read >= 0, "read " + read + " >=0 ");
529 assertTrue(read <= 255, "read " + read + " <= 255");
530 assertEquals(b, (byte) read, "Should agree with input");
531 }
532 assertEquals(-1, in.read());
533 }
534 }
535
536 @ParameterizedTest
537 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
538 public void testSingleByteRead_RequiredCharsets(final String csName) throws IOException {
539 testSingleByteRead(TEST_STRING, csName);
540 }
541
542 @Test
543 public void testSingleByteRead_UTF16() throws IOException {
544 testSingleByteRead(TEST_STRING, UTF_16);
545 }
546
547 @Test
548 public void testSingleByteRead_UTF8() throws IOException {
549 testSingleByteRead(TEST_STRING, UTF_8);
550 }
551
552 @ParameterizedTest
553 @MethodSource(CharsetsTest.REQUIRED_CHARSETS)
554 public void testSkip_RequiredCharsets(final String csName) throws Exception {
555 try (InputStream r = new CharSequenceInputStream("test", csName)) {
556 assertEquals(1, r.skip(1));
557 assertEquals(2, r.skip(2));
558 r.skip(100);
559 assertEquals(-1, r.read(), csName);
560 }
561 }
562 }