View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertNotNull;
22  import static org.junit.jupiter.api.Assertions.assertThrows;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  
25  import java.io.CharArrayReader;
26  import java.io.IOException;
27  import java.io.InputStream;
28  import java.io.StringReader;
29  import java.nio.charset.CharacterCodingException;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetEncoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.StandardCharsets;
34  import java.util.Arrays;
35  import java.util.Random;
36  import java.util.concurrent.TimeUnit;
37  import java.util.stream.Stream;
38  
39  import javax.xml.parsers.DocumentBuilderFactory;
40  
41  import org.apache.commons.io.IOUtils;
42  import org.junit.jupiter.api.Test;
43  import org.junit.jupiter.api.Timeout;
44  import org.junit.jupiter.params.ParameterizedTest;
45  import org.junit.jupiter.params.provider.Arguments;
46  import org.junit.jupiter.params.provider.MethodSource;
47  import org.xml.sax.InputSource;
48  import org.xml.sax.SAXException;
49  
50  public class ReaderInputStreamTest {
51  
52      private static final String UTF_16 = StandardCharsets.UTF_16.name();
53      private static final String UTF_8 = StandardCharsets.UTF_8.name();
54      private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
55      private static final String LARGE_TEST_STRING;
56  
57      static {
58          final StringBuilder buffer = new StringBuilder();
59          for (int i = 0; i < 100; i++) {
60              buffer.append(TEST_STRING);
61          }
62          LARGE_TEST_STRING = buffer.toString();
63      }
64  
65      static Stream<Arguments> charsetData() {
66          // @formatter:off
67          return Stream.of(
68                  Arguments.of("Cp930", "\u0391"),
69                  Arguments.of("ISO_8859_1", "A"),
70                  Arguments.of(UTF_8, "\u0391"));
71          // @formatter:on
72      }
73  
74      private final Random random = new Random();
75  
76      @Test
77      @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
78      public void testBufferSmallest() throws IOException {
79          final Charset charset = StandardCharsets.UTF_8;
80          // @formatter:off
81          try (InputStream in = new ReaderInputStream(
82                  new StringReader("\uD800"),
83                  charset, (int)
84                  ReaderInputStream.minBufferSize(charset.newEncoder()))) {
85              in.read();
86          }
87          try (InputStream in = ReaderInputStream.builder()
88                  .setReader(new StringReader("\uD800"))
89                  .setCharset(charset)
90                  .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder()))
91                  .get()) {
92              in.read();
93          }
94          // @formatter:on
95      }
96  
97      @Test
98      public void testBufferTooSmall() {
99          assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, -1));
100         assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, 0));
101         assertThrows(IllegalArgumentException.class, () -> new ReaderInputStream(new StringReader("\uD800"), StandardCharsets.UTF_8, 1));
102     }
103 
104     @ParameterizedTest
105     @MethodSource("charsetData")
106     public void testCharsetEncoderFlush(final String charsetName, final String data) throws IOException {
107         final Charset charset = Charset.forName(charsetName);
108         final byte[] expected = data.getBytes(charset);
109         try (InputStream in = new ReaderInputStream(new StringReader(data), charset)) {
110             assertEquals(Arrays.toString(expected), Arrays.toString(IOUtils.toByteArray(in)));
111         }
112         try (InputStream in = ReaderInputStream.builder().setReader(new StringReader(data)).setCharset(charset).get()) {
113             assertEquals(Arrays.toString(expected), Arrays.toString(IOUtils.toByteArray(in)));
114         }
115     }
116 
117     /*
118      * Tests https://issues.apache.org/jira/browse/IO-277
119      */
120     @Test
121     public void testCharsetMismatchInfiniteLoop() throws IOException {
122         // Input is UTF-8 bytes: 0xE0 0xB2 0xA0
123         final char[] inputChars = { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
124         // Charset charset = Charset.forName("UTF-8"); // works
125         final Charset charset = StandardCharsets.US_ASCII; // infinite loop
126         try (ReaderInputStream stream = new ReaderInputStream(new CharArrayReader(inputChars), charset)) {
127             IOUtils.toCharArray(stream, charset);
128         }
129     }
130 
131     @Test
132     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
133     public void testCodingError() throws IOException {
134         // Encoder which throws on malformed or unmappable input
135         CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
136         try (final ReaderInputStream in = new ReaderInputStream(new StringReader("\uD800"), encoder)) {
137             // Does not throws an exception because the input is an underflow and not an error
138             assertDoesNotThrow(() -> in.read());
139             // assertThrows(IllegalStateException.class, () -> in.read());
140         }
141         encoder = StandardCharsets.UTF_8.newEncoder();
142         try (final ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(encoder).get()) {
143             // TODO WIP
144             assertDoesNotThrow(() -> in.read());
145             // assertThrows(IllegalStateException.class, () -> in.read());
146         }
147     }
148 
149     /**
150      * Tests IO-717 to avoid infinite loops.
151      *
152      * ReaderInputStream does not throw exception with {@link CodingErrorAction#REPORT}.
153      */
154     @Test
155     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
156     public void testCodingErrorAction() throws IOException {
157         final Charset charset = StandardCharsets.UTF_8;
158         final CharsetEncoder encoder = charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT);
159         try (InputStream in = new ReaderInputStream(new StringReader("\uD800aa"), encoder, (int) ReaderInputStream.minBufferSize(encoder))) {
160             assertThrows(CharacterCodingException.class, in::read);
161         }
162         try (InputStream in = ReaderInputStream.builder().setReader(new StringReader("\uD800aa")).setCharsetEncoder(encoder)
163                 .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder())).get()) {
164             assertThrows(CharacterCodingException.class, in::read);
165         }
166     }
167 
168     @Test
169     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
170     public void testConstructNullCharset() throws IOException {
171         final Charset charset = Charset.defaultCharset();
172         final Charset encoder = null;
173         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), encoder, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
174             IOUtils.toByteArray(in);
175             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
176         }
177     }
178 
179     @Test
180     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
181     public void testConstructNullCharsetEncoder() throws IOException {
182         final Charset charset = Charset.defaultCharset();
183         final CharsetEncoder encoder = null;
184         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), encoder, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
185             IOUtils.toByteArray(in);
186             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
187         }
188     }
189 
190     @Test
191     @Timeout(value = 500, unit = TimeUnit.MILLISECONDS)
192     public void testConstructNullCharsetNameEncoder() throws IOException {
193         final Charset charset = Charset.defaultCharset();
194         final String charsetName = null;
195         try (ReaderInputStream in = new ReaderInputStream(new StringReader("ABC"), charsetName, (int) ReaderInputStream.minBufferSize(charset.newEncoder()))) {
196             IOUtils.toByteArray(in);
197             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
198         }
199         try (ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader("ABC")).setCharset(charsetName)
200                 .setBufferSize((int) ReaderInputStream.minBufferSize(charset.newEncoder())).get()) {
201             IOUtils.toByteArray(in);
202             assertEquals(Charset.defaultCharset(), in.getCharsetEncoder().charset());
203         }
204     }
205 
206     @Test
207     public void testIo803SAXException() throws IOException {
208         final StringReader reader = new StringReader("");
209         try (final ReaderInputStream inputStream = ReaderInputStream.builder().setCharset(StandardCharsets.UTF_8).setReader(reader).get()) {
210             final InputSource inputSource = new InputSource(inputStream);
211             assertThrows(SAXException.class, () -> DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputSource));
212         }
213     }
214 
215     @Test
216     public void testIo803StringReaderSanityCheck() {
217         final StringReader reader = new StringReader("");
218         final InputSource inputSource = new InputSource(reader);
219         assertThrows(SAXException.class, () -> DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputSource));
220     }
221 
222     @Test
223     public void testLargeUTF8WithBufferedRead() throws IOException {
224         testWithBufferedRead(LARGE_TEST_STRING, UTF_8);
225     }
226 
227     @Test
228     public void testLargeUTF8WithSingleByteRead() throws IOException {
229         testWithSingleByteRead(LARGE_TEST_STRING, UTF_8);
230     }
231 
232     @Test
233     public void testReadEofTwice() throws IOException {
234         try (ReaderInputStream reader = ReaderInputStream.builder().setCharset(StandardCharsets.UTF_8).setReader(new StringReader("123")).get()) {
235             assertEquals('1', reader.read());
236             assertEquals('2', reader.read());
237             assertEquals('3', reader.read());
238             assertEquals(-1, reader.read());
239             assertEquals(-1, reader.read());
240         }
241     }
242 
243     @SuppressWarnings("deprecation")
244     @Test
245     public void testReadZero() throws Exception {
246         final String inStr = "test";
247         try (ReaderInputStream inputStream = new ReaderInputStream(new StringReader(inStr))) {
248             testReadZero(inStr, inputStream);
249         }
250         try (ReaderInputStream inputStream = ReaderInputStream.builder().setReader(new StringReader(inStr)).get()) {
251             testReadZero(inStr, inputStream);
252         }
253     }
254 
255     private void testReadZero(final String inStr, final ReaderInputStream inputStream) throws IOException {
256         final byte[] bytes = new byte[30];
257         assertEquals(0, inputStream.read(bytes, 0, 0));
258         assertEquals(inStr.length(), inputStream.read(bytes, 0, inStr.length() + 1));
259         // Should always return 0 for length == 0
260         assertEquals(0, inputStream.read(bytes, 0, 0));
261     }
262 
263     @SuppressWarnings("deprecation")
264     @Test
265     public void testReadZeroEmptyString() throws Exception {
266         try (ReaderInputStream inputStream = new ReaderInputStream(new StringReader(""))) {
267             final byte[] bytes = new byte[30];
268             // Should always return 0 for length == 0
269             assertEquals(0, inputStream.read(bytes, 0, 0));
270             assertEquals(-1, inputStream.read(bytes, 0, 1));
271             assertEquals(0, inputStream.read(bytes, 0, 0));
272             assertEquals(-1, inputStream.read(bytes, 0, 1));
273         }
274     }
275 
276     @Test
277     public void testResetCharset() {
278         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharset((Charset) null).getCharset());
279     }
280 
281     @Test
282     public void testResetCharsetEncoder() {
283         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharsetEncoder(null).getCharsetEncoder());
284     }
285 
286     @Test
287     public void testResetCharsetName() {
288         assertNotNull(ReaderInputStream.builder().setReader(new StringReader("\uD800")).setCharset((String) null).getCharset());
289     }
290 
291     @Test
292     public void testUTF16WithSingleByteRead() throws IOException {
293         testWithSingleByteRead(TEST_STRING, UTF_16);
294     }
295 
296     @Test
297     public void testUTF8WithBufferedRead() throws IOException {
298         testWithBufferedRead(TEST_STRING, UTF_8);
299     }
300 
301     @Test
302     public void testUTF8WithSingleByteRead() throws IOException {
303         testWithSingleByteRead(TEST_STRING, UTF_8);
304     }
305 
306     private void testWithBufferedRead(final byte[] expected, final ReaderInputStream in) throws IOException {
307         final byte[] buffer = new byte[128];
308         int offset = 0;
309         while (true) {
310             int bufferOffset = random.nextInt(64);
311             final int bufferLength = random.nextInt(64);
312             int read = in.read(buffer, bufferOffset, bufferLength);
313             if (read == -1) {
314                 assertEquals(offset, expected.length);
315                 break;
316             }
317             assertTrue(read <= bufferLength);
318             while (read > 0) {
319                 assertTrue(offset < expected.length);
320                 assertEquals(expected[offset], buffer[bufferOffset]);
321                 offset++;
322                 bufferOffset++;
323                 read--;
324             }
325         }
326     }
327 
328     private void testWithBufferedRead(final String testString, final String charsetName) throws IOException {
329         final byte[] expected = testString.getBytes(charsetName);
330         try (ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName)) {
331             testWithBufferedRead(expected, in);
332         }
333         try (ReaderInputStream in = ReaderInputStream.builder().setReader(new StringReader(testString)).setCharset(charsetName).get()) {
334             testWithBufferedRead(expected, in);
335         }
336     }
337 
338     private void testWithSingleByteRead(final String testString, final String charsetName) throws IOException {
339         final byte[] bytes = testString.getBytes(charsetName);
340         try (ReaderInputStream in = new ReaderInputStream(new StringReader(testString), charsetName)) {
341             for (final byte b : bytes) {
342                 final int read = in.read();
343                 assertTrue(read >= 0);
344                 assertTrue(read <= 255);
345                 assertEquals(b, (byte) read);
346             }
347             assertEquals(-1, in.read());
348         }
349     }
350 }