View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertThrows;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  
25  import java.io.CharArrayReader;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.StringReader;
29  import java.nio.charset.CharacterCodingException;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetEncoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.StandardCharsets;
34  import java.util.Arrays;
35  import java.util.Random;
36  import java.util.concurrent.TimeUnit;
37  import java.util.stream.Stream;
38  
39  import javax.xml.parsers.DocumentBuilderFactory;
40  
41  import org.apache.commons.io.IOUtils;
42  import org.apache.commons.lang3.StringUtils;
43  import org.junit.jupiter.api.Test;
44  import org.junit.jupiter.api.Timeout;
45  import org.junit.jupiter.params.ParameterizedTest;
46  import org.junit.jupiter.params.provider.Arguments;
47  import org.junit.jupiter.params.provider.MethodSource;
48  import org.xml.sax.InputSource;
49  import org.xml.sax.SAXException;
50  
51  class ReaderInputStreamTest {
52  
53      private static final String UTF_16 = StandardCharsets.UTF_16.name();
54      private static final String UTF_8 = StandardCharsets.UTF_8.name();
55      private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
56      private static final String LARGE_TEST_STRING = StringUtils.repeat(TEST_STRING, 100);
57  
58      static Stream<Arguments> charsetData() {
59          // @formatter:off
60          return Stream.of(
61                  Arguments.of("Cp930", "\u0391"),
62                  Arguments.of("ISO_8859_1", "A"),
63                  Arguments.of(UTF_8, "\u0391"));
64          // @formatter:on
65      }
66  
67      private final Random random = new Random();
68  
69      private ReaderInputStream createInputStream() throws IOException {
70          // @formatter:off
71          return ReaderInputStream.builder()
72                  .setReader(new StringReader(TEST_STRING))
73                  .setCharset(StandardCharsets.ISO_8859_1)
74                  .get();
75          // @formatter:on
76      }
77  
78      @Test
79      void testAvailableAfterClose() throws IOException {
80          try (InputStream inputStream = createInputStream()) {
81              inputStream.close();
82              assertEquals(0, inputStream.available());
83          }
84      }
85  
86      @Test
87      void testAvailableAfterOpen() throws IOException {
88          try (InputStream inputStream = createInputStream()) {
89              // Nothing read, may block
90              assertEquals(0, inputStream.available());
91              // Read/block
92              inputStream.read();
93              assertEquals(TEST_STRING.length() - 1, inputStream.available());
94          }
95      }
96  
97      @Test
98      @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
99      void testBufferSmallest() throws IOException {
100         final Charset charset = StandardCharsets.UTF_8;
101         // @formatter:off
102         try (InputStream in = new ReaderInputStream(
103                 new StringReader("\uD800"),
104                 charset, (int)
105                 ReaderInputStream.minBufferSize(charset.newEncoder()))) {
106             in.read();
107         }
108         try (InputStream in = ReaderInputStream.builder()
109                 .setReader(new StringReader("\uD800"))
110                 .setCharset(charset)
111                 .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder()))
112                 .get()) {
113             in.read();
114         }
115         // @formatter:on
116     }
117 
118     @Test
119     void testBufferTooSmall() {
120         assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, -1));
121         assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, 0));
122         assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, 1));
123     }
124 
125     @ParameterizedTest
126     @MethodSource("charsetData")
127     void testCharsetEncoderFlush(final String charsetName, final String data) throws IOException {
128         final Charset charset = Charset.forName(charsetName);
129         final byte[] expected = data.getBytes(charset);
130         try (InputStream in = new ReaderInputStream(new StringReader(data), charset)) {
131             assertEquals(Arrays.toString(expected), Arrays.toString(IOUtils.toByteArray(in)));
132         }
133         try (InputStream in = ReaderInputStream.builder().setReader(new StringReader(data)).setCharset(charset).get()) {
134             assertEquals(Arrays.toString(expected), Arrays.toString(IOUtils.toByteArray(in)));
135         }
136     }
137 
138     /*
139      * Tests https://issues.apache.org/jira/browse/IO-277
140      */
141     @Test
142     void testCharsetMismatchInfiniteLoop() throws IOException {
143         // Input is UTF-8 bytes: 0xE0 0xB2 0xA0
144         final char[] inputChars = { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
145         // Charset charset = Charset.forName("UTF-8"); // works
146         final Charset charset = StandardCharsets.US_ASCII; // infinite loop
147         try (ReaderInputStream stream = new ReaderInputStream(new CharArrayReader(inputChars), charset)) {
148             IOUtils.toCharArray(stream, charset);
149         }
150     }
151 
152     @Test
153     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
154     void testCodingError() throws IOException {
155         // Encoder which throws on malformed or unmappable input
156         CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
157         try (ReaderInputStream in = new ReaderInputStream(new StringReader("\uD800"), encoder)) {
158             // Does not throws an exception because the input is an underflow and not an error
159             assertDoesNotThrow(() -> in.read());
160             // assertThrows(IllegalStateException.class, () -> in.read());
161         }
162         encoder = StandardCharsets.UTF_8.newEncoder();
163         try (ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(encoder).get()) {
164             // TODO WIP
165             assertDoesNotThrow(() -> in.read());
166             // assertThrows(IllegalStateException.class, () -> in.read());
167         }
168     }
169 
170     /**
171      * Tests IO-717 to avoid infinite loops.
172      *
173      * ReaderInputStream does not throw exception with {@link CodingErrorAction#REPORT}.
174      */
175     @Test
176     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
177     void testCodingErrorAction() throws IOException {
178         final Charset charset = StandardCharsets.UTF_8;
179         final CharsetEncoder encoder = charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT);
180         try (InputStream in = new ReaderInputStream(new StringReader("\uD800aa"), encoder, (int) ReaderInputStream.minBufferSize(encoder))) {
181             assertThrows(CharacterCodingException.class, in::read);
182         }
183         try (InputStream in = ReaderInputStream.builder().setReader(new StringReader("\uD800aa")).setCharsetEncoder(encoder)
184                 .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder())).get()) {
185             assertThrows(CharacterCodingException.class, in::read);
186         }
187     }
188 
189     @Test
190     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
191     void testConstructNullCharset() throws IOException {
192         final Charset charset = Charset.defaultCharset();
193         final Charset encoder = null;
194         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), encoder, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
195             IOUtils.toByteArray(in);
196             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
197         }
198     }
199 
200     @Test
201     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
202     void testConstructNullCharsetEncoder() throws IOException {
203         final Charset charset = Charset.defaultCharset();
204         final CharsetEncoder encoder = null;
205         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), encoder, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
206             IOUtils.toByteArray(in);
207             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
208         }
209     }
210 
211     @Test
212     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
213     void testConstructNullCharsetNameEncoder() throws IOException {
214         final Charset charset = Charset.defaultCharset();
215         final String charsetName = null;
216         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), charsetName, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
217             IOUtils.toByteArray(in);
218             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
219         }
220         try (ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader("ABC")).setCharset(charsetName)
221                 .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder())).get()) {
222             IOUtils.toByteArray(in);
223             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
224         }
225     }
226 
227     @Test
228     void testIo803SAXException() throws IOException {
229         final StringReader reader = new StringReader("");
230         try (ReaderInputStream inputStream = ReaderInputStream.builder().setCharset(StandardCharsets.UTF_8).setReader(reader).get()) {
231             final InputSource inputSource = new InputSource(inputStream);
232             assertThrows(SAXException.class, () -> DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputSource));
233         }
234     }
235 
236     @Test
237     void testIo803StringReaderSanityCheck() {
238         final StringReader reader = new StringReader("");
239         final InputSource inputSource = new InputSource(reader);
240         assertThrows(SAXException.class, () -> DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputSource));
241     }
242 
243     @Test
244     void testLargeUTF8WithBufferedRead() throws IOException {
245         testWithBufferedRead(LARGE_TEST_STRING, UTF_8);
246     }
247 
248     @Test
249     void testLargeUTF8WithSingleByteRead() throws IOException {
250         testWithSingleByteRead(LARGE_TEST_STRING, UTF_8);
251     }
252 
253     @Test
254     void testReadAfterClose() throws IOException {
255         try (InputStream inputStream = createInputStream()) {
256             inputStream.close();
257             assertThrows(IOException.class, inputStream::read);
258         }
259     }
260 
261     @Test
262     void testReadEofTwice() throws IOException {
263         try (ReaderInputStream reader = ReaderInputStream.builder().setCharset(StandardCharsets.UTF_8).setReader(new StringReader("123")).get()) {
264             assertEquals('1', reader.read());
265             assertEquals('2', reader.read());
266             assertEquals('3', reader.read());
267             assertEquals(-1, reader.read());
268             assertEquals(-1, reader.read());
269         }
270     }
271 
272     @SuppressWarnings("deprecation")
273     @Test
274     void testReadZero() throws Exception {
275         final String inStr = "test";
276         try (ReaderInputStream inputStream = new ReaderInputStream(new StringReader(inStr))) {
277             testReadZero(inStr, inputStream);
278         }
279         try (ReaderInputStream inputStream = ReaderInputStream.builder().setReader(new StringReader(inStr)).get()) {
280             testReadZero(inStr, inputStream);
281         }
282     }
283 
284     private void testReadZero(final String inStr, final ReaderInputStream inputStream) throws IOException {
285         final byte[] bytes = new byte[30];
286         assertEquals(0, inputStream.read(bytes, 0, 0));
287         assertEquals(inStr.length(), inputStream.read(bytes, 0, inStr.length() + 1));
288         // Should always return 0 for length == 0
289         assertEquals(0, inputStream.read(bytes, 0, 0));
290     }
291 
292     @SuppressWarnings("deprecation")
293     @Test
294     void testReadZeroEmptyString() throws Exception {
295         try (ReaderInputStream inputStream = new ReaderInputStream(new StringReader(""))) {
296             final byte[] bytes = new byte[30];
297             // Should always return 0 for length == 0
298             assertEquals(0, inputStream.read(bytes, 0, 0));
299             assertEquals(-1, inputStream.read(bytes, 0, 1));
300             assertEquals(0, inputStream.read(bytes, 0, 0));
301             assertEquals(-1, inputStream.read(bytes, 0, 1));
302         }
303     }
304 
305     @Test
306     void testResetCharset() {
307         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharset((Charset) null).getCharset());
308     }
309 
310     @Test
311     void testResetCharsetEncoder() {
312         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(null).getCharsetEncoder());
313     }
314 
315     @Test
316     void testResetCharsetName() {
317         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharset((String) null).getCharset());
318     }
319 
320     @Test
321     void testUTF16WithSingleByteRead() throws IOException {
322         testWithSingleByteRead(TEST_STRING, UTF_16);
323     }
324 
325     @Test
326     void testUTF8WithBufferedRead() throws IOException {
327         testWithBufferedRead(TEST_STRING, UTF_8);
328     }
329 
330     @Test
331     void testUTF8WithSingleByteRead() throws IOException {
332         testWithSingleByteRead(TEST_STRING, UTF_8);
333     }
334 
335     private void testWithBufferedRead(final byte[] expected, final ReaderInputStream in) throws IOException {
336         final byte[] buffer = new byte[128];
337         int offset = 0;
338         while (true) {
339             int bufferOffset = random.nextInt(64);
340             final int bufferLength = random.nextInt(64);
341             int read = in.read(buffer, bufferOffset, bufferLength);
342             if (read == -1) {
343                 assertEquals(offset, expected.length);
344                 break;
345             }
346             assertTrue(read <= bufferLength);
347             while (read > 0) {
348                 assertTrue(offset < expected.length);
349                 assertEquals(expected[offset], buffer[bufferOffset]);
350                 offset++;
351                 bufferOffset++;
352                 read--;
353             }
354         }
355     }
356 
357     private void testWithBufferedRead(final String testString, final String charsetName) throws IOException {
358         final byte[] expected = testString.getBytes(charsetName);
359         try (ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName)) {
360             testWithBufferedRead(expected, in);
361         }
362         try (ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader(testString)).setCharset(charsetName).get()) {
363             testWithBufferedRead(expected, in);
364         }
365     }
366 
367     private void testWithSingleByteRead(final String testString, final String charsetName) throws IOException {
368         final byte[] bytes = testString.getBytes(charsetName);
369         try (ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName)) {
370             for (final byte b : bytes) {
371                 final int read = in.read();
372                 assertTrue(read >= 0);
373                 assertTrue(read <= 255);
374                 assertEquals(b, (byte) read);
375             }
376             assertEquals(-1, in.read());
377         }
378     }
379 }