View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.junit.Assert.assertArrayEquals;
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.assertFalse;
23  
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.nio.charset.Charset;
27  import java.util.Random;
28  import java.util.Set;
29  
30  import org.apache.commons.io.Charsets;
31  import org.junit.Ignore;
32  import org.junit.Test;
33  
34  public class CharSequenceInputStreamTest {
35  
36      private static final String ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
37      private static final String LARGE_TEST_STRING;
38  
39      private static final String TEST_STRING = "\u00e0 peine arriv\u00e9s nous entr\u00e2mes dans sa chambre";
40  
41      static {
42          final StringBuilder buffer = new StringBuilder();
43          for (int i = 0; i < 100; i++) {
44              buffer.append(TEST_STRING);
45          }
46          LARGE_TEST_STRING = buffer.toString();
47      }
48  
49      private final Random random = new Random();
50  
51      private Set<String> getRequiredCharsetNames() {
52          return Charsets.requiredCharsets().keySet();
53      }
54  
55      private void testBufferedRead(final String testString, final String charsetName) throws IOException {
56          final byte[] expected = testString.getBytes(charsetName);
57          final InputStream in = new CharSequenceInputStream(testString, charsetName, 512);
58          try {
59              final byte[] buffer = new byte[128];
60              int offset = 0;
61              while (true) {
62                  int bufferOffset = random.nextInt(64);
63                  final int bufferLength = random.nextInt(64);
64                  int read = in.read(buffer, bufferOffset, bufferLength);
65                  if (read == -1) {
66                      assertEquals("EOF: offset should equal length", expected.length, offset);
67                      break;
68                  } else {
69                      assertTrue("Read " + read + " <= " + bufferLength, read <= bufferLength);
70                      while (read > 0) {
71                          assertTrue("offset " + offset + " < " + expected.length, offset < expected.length);
72                          assertEquals("bytes should agree", expected[offset], buffer[bufferOffset]);
73                          offset++;
74                          bufferOffset++;
75                          read--;
76                      }
77                  }
78              }
79          } finally {
80              in.close();
81          }
82      }
83  
84  //    Unfortunately checking canEncode does not seem to work for all charsets:
85  //    testBufferedRead_AvailableCharset(org.apache.commons.io.input.CharSequenceInputStreamTest)  Time elapsed: 0.682 sec  <<< ERROR!
86  //    java.lang.UnsupportedOperationException: null
87  //        at java.nio.CharBuffer.array(CharBuffer.java:940)
88  //        at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75)
89  //        at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
90  //        at org.apache.commons.io.input.CharSequenceInputStream.fillBuffer(CharSequenceInputStream.java:111)
91      @Test
92      public void testBufferedRead_AvailableCharset() throws IOException {
93          for (final String csName : Charset.availableCharsets().keySet()) {
94              // prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder.
95              if (isAvailabilityTestableForCharset(csName)) {
96                  testBufferedRead(TEST_STRING, csName);
97              }
98          }
99      }
100 
101     @Test
102     public void testBufferedRead_RequiredCharset() throws IOException {
103         for (final String csName : getRequiredCharsetNames()) {
104             testBufferedRead(TEST_STRING, csName);
105         }
106     }
107 
108     @Test
109     public void testBufferedRead_UTF8() throws IOException {
110         testBufferedRead(TEST_STRING, "UTF-8");
111     }
112 
113     private void testCharsetMismatchInfiniteLoop(final String csName) throws IOException {
114         // Input is UTF-8 bytes: 0xE0 0xB2 0xA0
115         final char[] inputChars = new char[] { (char) 0xE0, (char) 0xB2, (char) 0xA0 };
116         final Charset charset = Charset.forName(csName); // infinite loop for US-ASCII, UTF-8 OK
117         final InputStream stream = new CharSequenceInputStream(new String(inputChars), charset, 512);
118         try {
119             while (stream.read() != -1) {
120             }
121         } finally {
122             stream.close();
123         }
124     }
125 
126     @Test
127     public void testCharsetMismatchInfiniteLoop_RequiredCharsets() throws IOException {
128         for (final String csName : getRequiredCharsetNames()) {
129             testCharsetMismatchInfiniteLoop(csName);
130         }
131     }
132 
133     // Test is broken if readFirst > 0
134     // This is because the initial read fills the buffer from the CharSequence
135     // so data1 gets the first buffer full; data2 will get the next buffer full
136     private void testIO_356(final int bufferSize, final int dataSize, final int readFirst, final String csName) throws Exception {
137         final CharSequenceInputStream is = new CharSequenceInputStream(ALPHABET, csName, bufferSize);
138 
139         for (int i = 0; i < readFirst; i++) {
140             final int ch = is.read();
141             assertFalse(ch == -1);
142         }
143 
144         is.mark(dataSize);
145 
146         final byte[] data1 = new byte[dataSize];
147         final int readCount1 = is.read(data1);
148         assertEquals(dataSize, readCount1);
149 
150         is.reset(); // should allow data to be re-read
151 
152         final byte[] data2 = new byte[dataSize];
153         final int readCount2 = is.read(data2);
154         assertEquals(dataSize, readCount2);
155 
156         is.close();
157 
158         // data buffers should be identical
159         assertArrayEquals("bufferSize=" + bufferSize + " dataSize=" + dataSize, data1, data2);
160     }
161 
162     @Test
163     public void testIO_356_B10_D10_S0_UTF16() throws Exception {
164         testIO_356(10, 10, 0, "UTF-16");
165     }
166 
167     @Test
168     public void testIO_356_B10_D10_S0_UTF8() throws Exception {
169         testIO_356(10, 10, 0, "UTF-8");
170     }
171 
172     @Test
173     public void testIO_356_B10_D10_S1_UTF8() throws Exception {
174         testIO_356(10, 10, 1, "UTF-8");
175     }
176 
177     @Test
178     public void testIO_356_B10_D10_S2_UTF8() throws Exception {
179         testIO_356(10, 10, 2, "UTF-8");
180     }
181 
182     @Test
183     public void testIO_356_B10_D13_S0_UTF8() throws Exception {
184         testIO_356(10, 13, 0, "UTF-8");
185     }
186 
187     @Test
188     public void testIO_356_B10_D13_S1_UTF8() throws Exception {
189         testIO_356(10, 13, 1, "UTF-8");
190     }
191 
192     @Test
193     public void testIO_356_B10_D20_S0_UTF8() throws Exception {
194         testIO_356(10, 20, 0, "UTF-8");
195     }
196 
197     private void testIO_356_Loop(final String csName, final int maxBytesPerChar) throws Exception {
198         for (int bufferSize = maxBytesPerChar; bufferSize <= 10; bufferSize++) {
199             for (int dataSize = 1; dataSize <= 20; dataSize++) {
200                 testIO_356(bufferSize, dataSize, 0, csName);
201             }
202         }
203     }
204 
205     @Test
206     public void testIO_356_Loop_UTF16() throws Exception {
207         testIO_356_Loop("UTF-16", 4);
208     }
209 
210     @Test
211     public void testIO_356_Loop_UTF8() throws Exception {
212         testIO_356_Loop("UTF-8", 4);
213     }
214 
215     @Test
216     public void testLargeBufferedRead_RequiredCharsets() throws IOException {
217         for (final String csName : getRequiredCharsetNames()) {
218             testBufferedRead(LARGE_TEST_STRING, csName);
219         }
220     }
221 
222     @Test
223     public void testLargeBufferedRead_UTF8() throws IOException {
224         testBufferedRead(LARGE_TEST_STRING, "UTF-8");
225     }
226 
227     @Test
228     public void testLargeSingleByteRead_RequiredCharsets() throws IOException {
229         for (final String csName : getRequiredCharsetNames()) {
230             testSingleByteRead(LARGE_TEST_STRING, csName);
231         }
232     }
233 
234     @Test
235     public void testLargeSingleByteRead_UTF8() throws IOException {
236         testSingleByteRead(LARGE_TEST_STRING, "UTF-8");
237     }
238 
239     // This test is broken for charsets that don't create a single byte for each char
240     private void testMarkReset(final String csName) throws Exception {
241         final InputStream r = new CharSequenceInputStream("test", csName);
242         try {
243             assertEquals(2, r.skip(2));
244             r.mark(0);
245             assertEquals(csName, 's', r.read());
246             assertEquals(csName, 't', r.read());
247             assertEquals(csName, -1, r.read());
248             r.reset();
249             assertEquals(csName, 's', r.read());
250             assertEquals(csName, 't', r.read());
251             assertEquals(csName, -1, r.read());
252             r.reset();
253             r.reset();
254         } finally {
255             r.close();
256         }
257     }
258 
259     @Test
260     @Ignore // Test broken for charsets that create multiple bytes for a single char
261     public void testMarkReset_RequiredCharsets() throws Exception {
262         for (final String csName : getRequiredCharsetNames()) {
263             testMarkReset(csName);
264         }
265     }
266 
267     @Test
268     public void testMarkReset_USASCII() throws Exception {
269         testMarkReset("US-ASCII");
270     }
271 
272     @Test
273     public void testMarkReset_UTF8() throws Exception {
274         testMarkReset("UTF-8");
275     }
276 
277     @Test
278     public void testMarkSupported() throws Exception {
279         final InputStream r = new CharSequenceInputStream("test", "UTF-8");
280         try {
281             assertTrue(r.markSupported());
282         } finally {
283             r.close();
284         }
285     }
286 
287     private void testReadZero(final String csName) throws Exception {
288         final InputStream r = new CharSequenceInputStream("test", csName);
289         try {
290             final byte[] bytes = new byte[30];
291             assertEquals(0, r.read(bytes, 0, 0));
292         } finally {
293             r.close();
294         }
295     }
296 
297     @Test
298     public void testReadZero_EmptyString() throws Exception {
299         final InputStream r = new CharSequenceInputStream("", "UTF-8");
300         try {
301             final byte[] bytes = new byte[30];
302             assertEquals(0, r.read(bytes, 0, 0));
303         } finally {
304             r.close();
305         }
306     }
307 
308     @Test
309     public void testReadZero_RequiredCharsets() throws Exception {
310         for (final String csName : getRequiredCharsetNames()) {
311             testReadZero(csName);
312         }
313     }
314 
315     private void testSingleByteRead(final String testString, final String charsetName) throws IOException {
316         final byte[] bytes = testString.getBytes(charsetName);
317         final InputStream in = new CharSequenceInputStream(testString, charsetName, 512);
318         try {
319             for (final byte b : bytes) {
320                 final int read = in.read();
321                 assertTrue("read " + read + " >=0 ", read >= 0);
322                 assertTrue("read " + read + " <= 255", read <= 255);
323                 assertEquals("Should agree with input", b, (byte) read);
324             }
325             assertEquals(-1, in.read());
326         } finally {
327             in.close();
328         }
329     }
330 
331     @Test
332     public void testSingleByteRead_RequiredCharsets() throws IOException {
333         for (final String csName : getRequiredCharsetNames()) {
334             testSingleByteRead(TEST_STRING, csName);
335         }
336     }
337 
338     @Test
339     public void testSingleByteRead_UTF16() throws IOException {
340         testSingleByteRead(TEST_STRING, "UTF-16");
341     }
342 
343     @Test
344     public void testSingleByteRead_UTF8() throws IOException {
345         testSingleByteRead(TEST_STRING, "UTF-8");
346     }
347 
348     // This is broken for charsets that don't map each char to a byte
349     private void testSkip(final String csName) throws Exception {
350         final InputStream r = new CharSequenceInputStream("test", csName);
351         try {
352             assertEquals(1, r.skip(1));
353             assertEquals(2, r.skip(2));
354             assertEquals(csName, 't', r.read());
355             r.skip(100);
356             assertEquals(csName, -1, r.read());
357         } finally {
358             r.close();
359         }
360     }
361 
362     @Test
363     @Ignore // test is broken for charsets that generate multiple bytes per char.
364     public void testSkip_RequiredCharsets() throws Exception {
365         for (final String csName : getRequiredCharsetNames()) {
366             testSkip(csName);
367         }
368     }
369 
370     @Test
371     public void testSkip_USASCII() throws Exception {
372         testSkip("US-ASCII");
373     }
374 
375     @Test
376     public void testSkip_UTF8() throws Exception {
377         testSkip("UTF-8");
378     }
379 
380     private int checkAvail(InputStream is, int min) throws Exception {
381         int available = is.available();
382         assertTrue("avail should be >= " + min + ", but was " + available, available >= min);
383         return available;
384     }
385 
386     private void testAvailableSkip(final String csName) throws Exception {
387         final String input = "test";
388         final InputStream r = new CharSequenceInputStream(input, csName);
389         try {
390             int available = checkAvail(r, input.length());
391             assertEquals(available - 1, r.skip(available-1)); // skip all but one
392             available = checkAvail(r, 1);
393             assertEquals(1, r.skip(1));
394             available = checkAvail(r, 0);
395         } finally {
396             r.close();
397         }
398     }
399 
400     private void testAvailableRead(final String csName) throws Exception {
401         final String input = "test";
402         final InputStream r = new CharSequenceInputStream(input, csName);
403         try {
404             int available = checkAvail(r, input.length());
405             byte buff[] = new byte[available];
406             assertEquals(available - 1, r.skip(available-1)); // skip all but one
407             available = checkAvail(r, 1);
408             buff = new byte[available];
409             assertEquals(available, r.read(buff, 0, available));
410         } finally {
411             r.close();
412         }
413     }
414 
415     @Test
416     public void testAvailable() throws Exception {
417         for (final String csName : Charset.availableCharsets().keySet()) {
418             // prevent java.lang.UnsupportedOperationException at sun.nio.cs.ext.ISO2022_CN.newEncoder.
419             // also try and avoid the following Effor on Continuum
420 //            java.lang.UnsupportedOperationException: null
421 //            at java.nio.CharBuffer.array(CharBuffer.java:940)
422 //            at sun.nio.cs.ext.COMPOUND_TEXT_Encoder.encodeLoop(COMPOUND_TEXT_Encoder.java:75)
423 //            at java.nio.charset.CharsetEncoder.encode(CharsetEncoder.java:544)
424 //            at org.apache.commons.io.input.CharSequenceInputStream.fillBuffer(CharSequenceInputStream.java:120)
425 //            at org.apache.commons.io.input.CharSequenceInputStream.read(CharSequenceInputStream.java:151)
426 //            at org.apache.commons.io.input.CharSequenceInputStreamTest.testAvailableRead(CharSequenceInputStreamTest.java:412)
427 //            at org.apache.commons.io.input.CharSequenceInputStreamTest.testAvailable(CharSequenceInputStreamTest.java:424)
428 
429             if (isAvailabilityTestableForCharset(csName)) {
430                 testAvailableSkip(csName);
431                 testAvailableRead(csName);
432             }
433         }
434     }
435 
436     private boolean isAvailabilityTestableForCharset(final String csName) {
437         return Charset.forName(csName).canEncode() && ! "COMPOUND_TEXT".equalsIgnoreCase(csName) && ! "x-COMPOUND_TEXT".equalsIgnoreCase(csName);
438     }
439 }