View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.apache.commons.io.StandardLineSeparator.CR;
20  import static org.apache.commons.io.StandardLineSeparator.LF;
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertFalse;
23  import static org.junit.jupiter.api.Assertions.assertNull;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  
26  import java.io.File;
27  import java.io.IOException;
28  import java.io.UnsupportedEncodingException;
29  import java.net.URISyntaxException;
30  import java.nio.charset.StandardCharsets;
31  import java.util.stream.IntStream;
32  
33  import org.apache.commons.io.TestResources;
34  import org.junit.jupiter.api.AfterEach;
35  import org.junit.jupiter.api.Test;
36  import org.junit.jupiter.params.ParameterizedTest;
37  import org.junit.jupiter.params.provider.MethodSource;
38  
39  public class ReversedLinesFileReaderTestParamBlockSize {
40  
41      private static final String UTF_8 = StandardCharsets.UTF_8.name();
42      private static final String ISO_8859_1 = StandardCharsets.ISO_8859_1.name();
43  
44      // "A Test Line. Special chars: ÄäÜüÖöß Ãáéíïçñ ©µ¥£±²®"
45      private static final String TEST_LINE = "A Test Line. Special chars: "
46          + "\u00C4\u00E4\u00DC\u00FC\u00D6\u00F6\u00DF \u00C3\u00E1\u00E9\u00ED\u00EF\u00E7\u00F1\u00C2 \u00A9\u00B5\u00A5\u00A3\u00B1\u00B2\u00AE";
47  
48      // Hiragana letters: �����
49      private static final String TEST_LINE_SHIFT_JIS1 = "Hiragana letters: \u3041\u3042\u3043\u3044\u3045";
50  
51      // Strings are escaped in constants to avoid java source encoding issues (source file enc is UTF-8):
52  
53      // Kanji letters: 明輸�京
54      private static final String TEST_LINE_SHIFT_JIS2 = "Kanji letters: \u660E\u8F38\u5B50\u4EAC";
55      // windows-31j characters
56      private static final String TEST_LINE_WINDOWS_31J_1 = "\u3041\u3042\u3043\u3044\u3045";
57      private static final String TEST_LINE_WINDOWS_31J_2 = "\u660E\u8F38\u5B50\u4EAC";
58      // gbk characters (Simplified Chinese)
59      private static final String TEST_LINE_GBK_1 = "\u660E\u8F38\u5B50\u4EAC";
60      private static final String TEST_LINE_GBK_2 = "\u7B80\u4F53\u4E2D\u6587";
61      // x-windows-949 characters (Korean)
62      private static final String TEST_LINE_X_WINDOWS_949_1 = "\uD55C\uAD6D\uC5B4";
63      private static final String TEST_LINE_X_WINDOWS_949_2 = "\uB300\uD55C\uBBFC\uAD6D";
64      // x-windows-950 characters (Traditional Chinese)
65      private static final String TEST_LINE_X_WINDOWS_950_1 = "\u660E\u8F38\u5B50\u4EAC";
66      private static final String TEST_LINE_X_WINDOWS_950_2 = "\u7E41\u9AD4\u4E2D\u6587";
67  
68      static void assertEqualsAndNoLineBreaks(final String expected, final String actual) {
69          assertEqualsAndNoLineBreaks(null, expected, actual);
70      }
71  
72      static void assertEqualsAndNoLineBreaks(final String msg, final String expected, final String actual) {
73          if (actual != null) {
74              assertFalse(actual.contains(LF.getString()), "Line contains \\n: line=" + actual);
75              assertFalse(actual.contains(CR.getString()), "Line contains \\r: line=" + actual);
76          }
77          assertEquals(expected, actual, msg);
78      }
79  
80      // small and uneven block sizes are not used in reality but are good to show that the algorithm is solid
81      public static IntStream blockSizes() {
82          return IntStream.of(1, 3, 8, 256, 4096);
83      }
84  
85      private ReversedLinesFileReader reversedLinesFileReader;
86  
87      private void assertFileWithShrinkingTestLines(final ReversedLinesFileReader reversedLinesFileReader) throws IOException {
88          String line = null;
89          int lineCount = 0;
90          while ((line = reversedLinesFileReader.readLine()) != null) {
91              lineCount++;
92              assertEqualsAndNoLineBreaks("Line " + lineCount + " is not matching", TEST_LINE.substring(0, lineCount), line);
93          }
94      }
95  
96      @AfterEach
97      public void closeReader() {
98          try {
99              if (reversedLinesFileReader != null) {
100                 reversedLinesFileReader.close();
101             }
102         } catch (final Exception e) {
103             // ignore
104         }
105     }
106 
107     @ParameterizedTest(name = "BlockSize={0}")
108     @MethodSource("blockSizes")
109     public void testEmptyFile(final int blockSize) throws URISyntaxException, IOException {
110         final File testFileEmpty = TestResources.getFile("/test-file-empty.bin");
111         try (ReversedLinesFileReader reader = new ReversedLinesFileReader(testFileEmpty, blockSize, UTF_8)) {
112             assertNull(reader.readLine());
113         }
114         try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder().setFile(testFileEmpty).setBufferSize(blockSize).setCharset(UTF_8).get()) {
115             assertNull(reader.readLine());
116         }
117     }
118 
119     @Test
120     public void testFileSizeIsExactMultipleOfBlockSize() throws URISyntaxException, IOException {
121         final int blockSize = 10;
122         final File testFile20Bytes = TestResources.getFile("/test-file-20byteslength.bin");
123         reversedLinesFileReader = new ReversedLinesFileReader(testFile20Bytes, blockSize, ISO_8859_1);
124         assertEqualsAndNoLineBreaks("987654321", reversedLinesFileReader.readLine());
125         assertEqualsAndNoLineBreaks("123456789", reversedLinesFileReader.readLine());
126     }
127 
128     @ParameterizedTest(name = "BlockSize={0}")
129     @MethodSource("blockSizes")
130     public void testGBK(final int testParamBlockSize) throws URISyntaxException, IOException {
131         final File testFileGBK = TestResources.getFile("/test-file-gbk.bin");
132         reversedLinesFileReader = new ReversedLinesFileReader(testFileGBK, testParamBlockSize, "GBK");
133         assertEqualsAndNoLineBreaks(TEST_LINE_GBK_2, reversedLinesFileReader.readLine());
134         assertEqualsAndNoLineBreaks(TEST_LINE_GBK_1, reversedLinesFileReader.readLine());
135     }
136 
137     @ParameterizedTest(name = "BlockSize={0}")
138     @MethodSource("blockSizes")
139     public void testIsoFileDefaults(final int testParamBlockSize) throws URISyntaxException, IOException {
140         final File testFileIso = TestResources.getFile("/test-file-iso8859-1.bin");
141         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, ISO_8859_1);
142         assertFileWithShrinkingTestLines(reversedLinesFileReader);
143     }
144 
145     @ParameterizedTest(name = "BlockSize={0}")
146     @MethodSource("blockSizes")
147     public void testIsoFileManyWindowsBreaksSmallBlockSize2VerifyBlockSpanningNewLines(final int testParamBlockSize) throws URISyntaxException, IOException {
148         final File testFileIso = TestResources.getFile("/test-file-iso8859-1-shortlines-win-linebr.bin");
149         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, ISO_8859_1);
150 
151         for (int i = 3; i > 0; i--) {
152             for (int j = 1; j <= 3; j++) {
153                 assertEqualsAndNoLineBreaks("", reversedLinesFileReader.readLine());
154             }
155             assertEqualsAndNoLineBreaks("" + i, reversedLinesFileReader.readLine());
156         }
157     }
158 
159     @ParameterizedTest(name = "BlockSize={0}")
160     @MethodSource("blockSizes")
161     public void testShiftJISFile(final int testParamBlockSize) throws URISyntaxException, IOException {
162         final File testFileShiftJIS = TestResources.getFile("/test-file-shiftjis.bin");
163         reversedLinesFileReader = new ReversedLinesFileReader(testFileShiftJIS, testParamBlockSize, "Shift_JIS");
164         assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS2, reversedLinesFileReader.readLine());
165         assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS1, reversedLinesFileReader.readLine());
166     }
167 
168     @ParameterizedTest(name = "BlockSize={0}")
169     @MethodSource("blockSizes")
170     public void testUnsupportedEncodingBig5(final int testParamBlockSize) throws URISyntaxException {
171         final File testFileEncodingBig5 = TestResources.getFile("/test-file-empty.bin");
172         assertThrows(UnsupportedEncodingException.class,
173                 () -> new ReversedLinesFileReader(testFileEncodingBig5, testParamBlockSize, "Big5").close());
174     }
175 
176     @ParameterizedTest(name = "BlockSize={0}")
177     @MethodSource("blockSizes")
178     public void testUnsupportedEncodingUTF16(final int testParamBlockSize) throws URISyntaxException {
179         final File testFileEmpty = TestResources.getFile("/test-file-empty.bin");
180         assertThrows(UnsupportedEncodingException.class,
181                 () -> new ReversedLinesFileReader(testFileEmpty, testParamBlockSize, StandardCharsets.UTF_16.name()).close());
182     }
183 
184     @ParameterizedTest(name = "BlockSize={0}")
185     @MethodSource("blockSizes")
186     public void testUTF16BEFile(final int testParamBlockSize) throws URISyntaxException, IOException {
187         final File testFileUTF16BE = TestResources.getFile("/test-file-utf16be.bin");
188         reversedLinesFileReader = new ReversedLinesFileReader(testFileUTF16BE, testParamBlockSize, StandardCharsets.UTF_16BE.name());
189         assertFileWithShrinkingTestLines(reversedLinesFileReader);
190     }
191 
192     @ParameterizedTest(name = "BlockSize={0}")
193     @MethodSource("blockSizes")
194     public void testUTF16LEFile(final int testParamBlockSize) throws URISyntaxException, IOException {
195         final File testFileUTF16LE = TestResources.getFile("/test-file-utf16le.bin");
196         reversedLinesFileReader = new ReversedLinesFileReader(testFileUTF16LE, testParamBlockSize, StandardCharsets.UTF_16LE.name());
197         assertFileWithShrinkingTestLines(reversedLinesFileReader);
198     }
199 
200     @ParameterizedTest(name = "BlockSize={0}")
201     @MethodSource("blockSizes")
202     public void testUTF8File(final int testParamBlockSize) throws URISyntaxException, IOException {
203         final File testFileIso = TestResources.getFile("/test-file-utf8.bin");
204         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
205         assertFileWithShrinkingTestLines(reversedLinesFileReader);
206     }
207 
208     @ParameterizedTest(name = "BlockSize={0}")
209     @MethodSource("blockSizes")
210     public void testUTF8FileCRBreaks(final int testParamBlockSize) throws URISyntaxException, IOException {
211         final File testFileIso = TestResources.getFile("/test-file-utf8-cr-only.bin");
212         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
213         assertFileWithShrinkingTestLines(reversedLinesFileReader);
214     }
215 
216     @ParameterizedTest(name = "BlockSize={0}")
217     @MethodSource("blockSizes")
218     public void testUTF8FileWindowsBreaks(final int testParamBlockSize) throws URISyntaxException, IOException {
219         final File testFileIso = TestResources.getFile("/test-file-utf8-win-linebr.bin");
220         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
221         assertFileWithShrinkingTestLines(reversedLinesFileReader);
222     }
223 
224     @ParameterizedTest(name = "BlockSize={0}")
225     @MethodSource("blockSizes")
226     public void testUTF8FileWindowsBreaksSmallBlockSize2VerifyBlockSpanningNewLines(final int testParamBlockSize) throws URISyntaxException, IOException {
227         final File testFileUtf8 = TestResources.getFile("/test-file-utf8-win-linebr.bin");
228         reversedLinesFileReader = new ReversedLinesFileReader(testFileUtf8, testParamBlockSize, UTF_8);
229         assertFileWithShrinkingTestLines(reversedLinesFileReader);
230     }
231 
232     @ParameterizedTest(name = "BlockSize={0}")
233     @MethodSource("blockSizes")
234     public void testWindows31jFile(final int testParamBlockSize) throws URISyntaxException, IOException {
235         final File testFileWindows31J = TestResources.getFile("/test-file-windows-31j.bin");
236         reversedLinesFileReader = new ReversedLinesFileReader(testFileWindows31J, testParamBlockSize, "windows-31j");
237         assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_2, reversedLinesFileReader.readLine());
238         assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_1, reversedLinesFileReader.readLine());
239     }
240 
241     @ParameterizedTest(name = "BlockSize={0}")
242     @MethodSource("blockSizes")
243     public void testxWindows949File(final int testParamBlockSize) throws URISyntaxException, IOException {
244         final File testFilexWindows949 = TestResources.getFile("/test-file-x-windows-949.bin");
245         reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows949, testParamBlockSize, "x-windows-949");
246         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_2, reversedLinesFileReader.readLine());
247         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_1, reversedLinesFileReader.readLine());
248     }
249 
250     @ParameterizedTest(name = "BlockSize={0}")
251     @MethodSource("blockSizes")
252     public void testxWindows950File(final int testParamBlockSize) throws URISyntaxException, IOException {
253         final File testFilexWindows950 = TestResources.getFile("/test-file-x-windows-950.bin");
254         reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows950, testParamBlockSize, "x-windows-950");
255         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_2, reversedLinesFileReader.readLine());
256         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_1, reversedLinesFileReader.readLine());
257     }
258 }