View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.io.input;
18  
19  import static org.apache.commons.io.StandardLineSeparator.CR;
20  import static org.apache.commons.io.StandardLineSeparator.LF;
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertFalse;
23  import static org.junit.jupiter.api.Assertions.assertNull;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  
26  import java.io.File;
27  import java.io.IOException;
28  import java.io.UnsupportedEncodingException;
29  import java.net.URISyntaxException;
30  import java.nio.charset.StandardCharsets;
31  import java.util.concurrent.atomic.AtomicInteger;
32  import java.util.function.Supplier;
33  import java.util.stream.IntStream;
34  
35  import org.apache.commons.io.TestResources;
36  import org.junit.jupiter.api.AfterEach;
37  import org.junit.jupiter.api.Test;
38  import org.junit.jupiter.params.ParameterizedTest;
39  import org.junit.jupiter.params.provider.MethodSource;
40  
41  /**
42   * Tests {@link ReversedLinesFileReader}.
43   */
44  public class ReversedLinesFileReaderParamBlockSizeTest {
45  
46      private static final String UTF_8 = StandardCharsets.UTF_8.name();
47      private static final String ISO_8859_1 = StandardCharsets.ISO_8859_1.name();
48  
49      // "A Test Line. Special chars: ÄäÜüÖöß Ãáéíïçñ ©µ¥£±²®"
50      private static final String TEST_LINE = "A Test Line. Special chars: "
51          + "\u00C4\u00E4\u00DC\u00FC\u00D6\u00F6\u00DF \u00C3\u00E1\u00E9\u00ED\u00EF\u00E7\u00F1\u00C2 \u00A9\u00B5\u00A5\u00A3\u00B1\u00B2\u00AE";
52  
53      // Hiragana letters: �����
54      private static final String TEST_LINE_SHIFT_JIS1 = "Hiragana letters: \u3041\u3042\u3043\u3044\u3045";
55  
56      // Strings are escaped in constants to avoid java source encoding issues (source file enc is UTF-8):
57  
58      // Kanji letters: 明輸�京
59      private static final String TEST_LINE_SHIFT_JIS2 = "Kanji letters: \u660E\u8F38\u5B50\u4EAC";
60      // windows-31j characters
61      private static final String TEST_LINE_WINDOWS_31J_1 = "\u3041\u3042\u3043\u3044\u3045";
62      private static final String TEST_LINE_WINDOWS_31J_2 = "\u660E\u8F38\u5B50\u4EAC";
63      // gbk characters (Simplified Chinese)
64      private static final String TEST_LINE_GBK_1 = "\u660E\u8F38\u5B50\u4EAC";
65      private static final String TEST_LINE_GBK_2 = "\u7B80\u4F53\u4E2D\u6587";
66      // x-windows-949 characters (Korean)
67      private static final String TEST_LINE_X_WINDOWS_949_1 = "\uD55C\uAD6D\uC5B4";
68      private static final String TEST_LINE_X_WINDOWS_949_2 = "\uB300\uD55C\uBBFC\uAD6D";
69      // x-windows-950 characters (Traditional Chinese)
70      private static final String TEST_LINE_X_WINDOWS_950_1 = "\u660E\u8F38\u5B50\u4EAC";
71      private static final String TEST_LINE_X_WINDOWS_950_2 = "\u7E41\u9AD4\u4E2D\u6587";
72  
73      static void assertEqualsAndNoLineBreaks(final String expected, final String actual) {
74          assertEqualsAndNoLineBreaks(expected, actual, null);
75      }
76  
77      static void assertEqualsAndNoLineBreaks(final String expected, final String actual, final Supplier<String> messageSupplier) {
78          if (actual != null) {
79              assertFalse(actual.contains(LF.getString()), "Line contains \\n: line=" + actual);
80              assertFalse(actual.contains(CR.getString()), "Line contains \\r: line=" + actual);
81          }
82          assertEquals(expected, actual, messageSupplier);
83      }
84  
85      /**
86       * Small and uneven block sizes are not used in reality but are good to show that the algorithm is solid.
87       */
88      public static IntStream blockSizes() {
89          return IntStream.of(1, 3, 8, 10, 256, 4096, 8192);
90      }
91  
92      private ReversedLinesFileReader reversedLinesFileReader;
93  
94      private void assertFileWithShrinkingTestLines(final ReversedLinesFileReader reversedLinesFileReader) throws IOException {
95          final AtomicInteger count = new AtomicInteger();
96          reversedLinesFileReader.forEach(
97                  line -> assertEqualsAndNoLineBreaks(TEST_LINE.substring(0, count.incrementAndGet()), line, () -> "Line " + count + " is not matching"));
98      }
99  
100     @AfterEach
101     public void closeReader() {
102         try {
103             if (reversedLinesFileReader != null) {
104                 reversedLinesFileReader.close();
105             }
106         } catch (final Exception e) {
107             // ignore
108         }
109     }
110 
111     @ParameterizedTest(name = "BlockSize={0}")
112     @MethodSource("blockSizes")
113     public void testEmptyFile(final int blockSize) throws URISyntaxException, IOException {
114         final File testFileEmpty = TestResources.getFile("/test-file-empty.bin");
115         try (ReversedLinesFileReader reader = new ReversedLinesFileReader(testFileEmpty, blockSize, UTF_8)) {
116             assertNull(reader.readLine());
117         }
118         try (ReversedLinesFileReader reader = ReversedLinesFileReader.builder().setFile(testFileEmpty).setBufferSize(blockSize).setCharset(UTF_8).get()) {
119             assertNull(reader.readLine());
120         }
121     }
122 
123     @Test
124     public void testFileSizeIsExactMultipleOfBlockSize() throws URISyntaxException, IOException {
125         final int blockSize = 10;
126         final File testFile20Bytes = TestResources.getFile("/test-file-20byteslength.bin");
127         reversedLinesFileReader = new ReversedLinesFileReader(testFile20Bytes, blockSize, ISO_8859_1);
128         assertEqualsAndNoLineBreaks("987654321", reversedLinesFileReader.readLine());
129         assertEqualsAndNoLineBreaks("123456789", reversedLinesFileReader.readLine());
130     }
131 
132     @ParameterizedTest(name = "BlockSize={0}")
133     @MethodSource("blockSizes")
134     public void testGBK(final int testParamBlockSize) throws URISyntaxException, IOException {
135         final File testFileGBK = TestResources.getFile("/test-file-gbk.bin");
136         reversedLinesFileReader = new ReversedLinesFileReader(testFileGBK, testParamBlockSize, "GBK");
137         assertEqualsAndNoLineBreaks(TEST_LINE_GBK_2, reversedLinesFileReader.readLine());
138         assertEqualsAndNoLineBreaks(TEST_LINE_GBK_1, reversedLinesFileReader.readLine());
139     }
140 
141     @ParameterizedTest(name = "BlockSize={0}")
142     @MethodSource("blockSizes")
143     public void testIsoFileDefaults(final int testParamBlockSize) throws URISyntaxException, IOException {
144         final File testFileIso = TestResources.getFile("/test-file-iso8859-1.bin");
145         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, ISO_8859_1);
146         assertFileWithShrinkingTestLines(reversedLinesFileReader);
147     }
148 
149     @ParameterizedTest(name = "BlockSize={0}")
150     @MethodSource("blockSizes")
151     public void testIsoFileManyWindowsBreaksSmallBlockSize2VerifyBlockSpanningNewLines(final int testParamBlockSize) throws URISyntaxException, IOException {
152         final File testFileIso = TestResources.getFile("/test-file-iso8859-1-shortlines-win-linebr.bin");
153         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, ISO_8859_1);
154 
155         for (int i = 3; i > 0; i--) {
156             for (int j = 1; j <= 3; j++) {
157                 assertEqualsAndNoLineBreaks("", reversedLinesFileReader.readLine());
158             }
159             assertEqualsAndNoLineBreaks("" + i, reversedLinesFileReader.readLine());
160         }
161     }
162 
163     @ParameterizedTest(name = "BlockSize={0}")
164     @MethodSource("blockSizes")
165     public void testShiftJISFile(final int testParamBlockSize) throws URISyntaxException, IOException {
166         final File testFileShiftJIS = TestResources.getFile("/test-file-shiftjis.bin");
167         reversedLinesFileReader = new ReversedLinesFileReader(testFileShiftJIS, testParamBlockSize, "Shift_JIS");
168         assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS2, reversedLinesFileReader.readLine());
169         assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS1, reversedLinesFileReader.readLine());
170     }
171 
172     @ParameterizedTest(name = "BlockSize={0}")
173     @MethodSource("blockSizes")
174     public void testUnsupportedEncodingBig5(final int testParamBlockSize) throws URISyntaxException {
175         final File testFileEncodingBig5 = TestResources.getFile("/test-file-empty.bin");
176         assertThrows(UnsupportedEncodingException.class,
177                 () -> new ReversedLinesFileReader(testFileEncodingBig5, testParamBlockSize, "Big5").close());
178     }
179 
180     @ParameterizedTest(name = "BlockSize={0}")
181     @MethodSource("blockSizes")
182     public void testUnsupportedEncodingUTF16(final int testParamBlockSize) throws URISyntaxException {
183         final File testFileEmpty = TestResources.getFile("/test-file-empty.bin");
184         assertThrows(UnsupportedEncodingException.class,
185                 () -> new ReversedLinesFileReader(testFileEmpty, testParamBlockSize, StandardCharsets.UTF_16.name()).close());
186     }
187 
188     @ParameterizedTest(name = "BlockSize={0}")
189     @MethodSource("blockSizes")
190     public void testUTF16BEFile(final int testParamBlockSize) throws URISyntaxException, IOException {
191         final File testFileUTF16BE = TestResources.getFile("/test-file-utf16be.bin");
192         reversedLinesFileReader = new ReversedLinesFileReader(testFileUTF16BE, testParamBlockSize, StandardCharsets.UTF_16BE.name());
193         assertFileWithShrinkingTestLines(reversedLinesFileReader);
194     }
195 
196     @ParameterizedTest(name = "BlockSize={0}")
197     @MethodSource("blockSizes")
198     public void testUTF16LEFile(final int testParamBlockSize) throws URISyntaxException, IOException {
199         final File testFileUTF16LE = TestResources.getFile("/test-file-utf16le.bin");
200         reversedLinesFileReader = new ReversedLinesFileReader(testFileUTF16LE, testParamBlockSize, StandardCharsets.UTF_16LE.name());
201         assertFileWithShrinkingTestLines(reversedLinesFileReader);
202     }
203 
204     @ParameterizedTest(name = "BlockSize={0}")
205     @MethodSource("blockSizes")
206     public void testUTF8File(final int testParamBlockSize) throws URISyntaxException, IOException {
207         final File testFileIso = TestResources.getFile("/test-file-utf8.bin");
208         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
209         assertFileWithShrinkingTestLines(reversedLinesFileReader);
210     }
211 
212     @ParameterizedTest(name = "BlockSize={0}")
213     @MethodSource("blockSizes")
214     public void testUTF8FileCRBreaks(final int testParamBlockSize) throws URISyntaxException, IOException {
215         final File testFileIso = TestResources.getFile("/test-file-utf8-cr-only.bin");
216         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
217         assertFileWithShrinkingTestLines(reversedLinesFileReader);
218     }
219 
220     @ParameterizedTest(name = "BlockSize={0}")
221     @MethodSource("blockSizes")
222     public void testUTF8FileWindowsBreaks(final int testParamBlockSize) throws URISyntaxException, IOException {
223         final File testFileIso = TestResources.getFile("/test-file-utf8-win-linebr.bin");
224         reversedLinesFileReader = new ReversedLinesFileReader(testFileIso, testParamBlockSize, UTF_8);
225         assertFileWithShrinkingTestLines(reversedLinesFileReader);
226     }
227 
228     @ParameterizedTest(name = "BlockSize={0}")
229     @MethodSource("blockSizes")
230     public void testUTF8FileWindowsBreaksSmallBlockSize2VerifyBlockSpanningNewLines(final int testParamBlockSize) throws URISyntaxException, IOException {
231         final File testFileUtf8 = TestResources.getFile("/test-file-utf8-win-linebr.bin");
232         reversedLinesFileReader = new ReversedLinesFileReader(testFileUtf8, testParamBlockSize, UTF_8);
233         assertFileWithShrinkingTestLines(reversedLinesFileReader);
234     }
235 
236     @ParameterizedTest(name = "BlockSize={0}")
237     @MethodSource("blockSizes")
238     public void testWindows31jFile(final int testParamBlockSize) throws URISyntaxException, IOException {
239         final File testFileWindows31J = TestResources.getFile("/test-file-windows-31j.bin");
240         reversedLinesFileReader = new ReversedLinesFileReader(testFileWindows31J, testParamBlockSize, "windows-31j");
241         assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_2, reversedLinesFileReader.readLine());
242         assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_1, reversedLinesFileReader.readLine());
243     }
244 
245     @ParameterizedTest(name = "BlockSize={0}")
246     @MethodSource("blockSizes")
247     public void testxWindows949File(final int testParamBlockSize) throws URISyntaxException, IOException {
248         final File testFilexWindows949 = TestResources.getFile("/test-file-x-windows-949.bin");
249         reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows949, testParamBlockSize, "x-windows-949");
250         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_2, reversedLinesFileReader.readLine());
251         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_1, reversedLinesFileReader.readLine());
252     }
253 
254     @ParameterizedTest(name = "BlockSize={0}")
255     @MethodSource("blockSizes")
256     public void testxWindows950File(final int testParamBlockSize) throws URISyntaxException, IOException {
257         final File testFilexWindows950 = TestResources.getFile("/test-file-x-windows-950.bin");
258         reversedLinesFileReader = new ReversedLinesFileReader(testFilexWindows950, testParamBlockSize, "x-windows-950");
259         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_2, reversedLinesFileReader.readLine());
260         assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_1, reversedLinesFileReader.readLine());
261     }
262 }