1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.csv;
19
20 import static org.apache.commons.csv.Constants.CR;
21 import static org.apache.commons.csv.Constants.CRLF;
22 import static org.apache.commons.csv.Constants.LF;
23 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
24 import static org.junit.jupiter.api.Assertions.assertEquals;
25 import static org.junit.jupiter.api.Assertions.assertFalse;
26 import static org.junit.jupiter.api.Assertions.assertNotNull;
27 import static org.junit.jupiter.api.Assertions.assertNull;
28 import static org.junit.jupiter.api.Assertions.assertThrows;
29 import static org.junit.jupiter.api.Assertions.assertTrue;
30
31 import java.io.File;
32 import java.io.IOException;
33 import java.io.InputStreamReader;
34 import java.io.PipedReader;
35 import java.io.PipedWriter;
36 import java.io.Reader;
37 import java.io.StringReader;
38 import java.io.StringWriter;
39 import java.io.UncheckedIOException;
40 import java.net.URL;
41 import java.nio.charset.Charset;
42 import java.nio.charset.StandardCharsets;
43 import java.nio.file.Files;
44 import java.nio.file.Path;
45 import java.nio.file.Paths;
46 import java.util.ArrayList;
47 import java.util.Arrays;
48 import java.util.Iterator;
49 import java.util.List;
50 import java.util.Map;
51 import java.util.NoSuchElementException;
52 import java.util.stream.Collectors;
53
54 import org.apache.commons.io.input.BOMInputStream;
55 import org.apache.commons.io.input.BrokenInputStream;
56 import org.junit.jupiter.api.Assertions;
57 import org.junit.jupiter.api.Disabled;
58 import org.junit.jupiter.api.Test;
59
60
61
62
63
64
65
66
67 public class CSVParserTest {
68
69 private static final Charset UTF_8 = StandardCharsets.UTF_8;
70
71 private static final String UTF_8_NAME = UTF_8.name();
72
73 private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n"
74
75 + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n";
76
77 private static final String CSV_INPUT_1 = "a,b,c,d";
78
79 private static final String CSV_INPUT_2 = "a,b,1 2";
80
81 private static final String[][] RESULT = {{"a", "b", "c", "d"}, {"a", "b", "1 2"}, {"foo baar", "b", ""}, {"foo\n,,\n\",,\n\"", "d", "e"}};
82
83
84 static private final String CSV_INPUT_NO_COMMENT = "A,B"+CRLF+"1,2"+CRLF;
85
86
87 static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF;
88
89
90 static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment";
91
92
93 static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# multi-line" + CRLF + "# comment";
94
95
96 static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build();
97
98
99 static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT)
100 .setSkipHeaderRecord(true)
101 .setCommentMarker('#')
102 .setHeader("A", "B")
103 .build();
104
105
106 CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT)
107 .setCommentMarker('#')
108 .setHeader("A", "B")
109 .build();
110
111 @SuppressWarnings("resource")
112 private BOMInputStream createBOMInputStream(final String resource) throws IOException {
113 return new BOMInputStream(ClassLoader.getSystemClassLoader().getResource(resource).openStream());
114 }
115
116 CSVRecord parse(final CSVParser parser, final int failParseRecordNo) throws IOException {
117 if (parser.getRecordNumber() + 1 == failParseRecordNo) {
118 assertThrows(IOException.class, () -> parser.nextRecord());
119 return null;
120 }
121 return parser.nextRecord();
122 }
123
124 private void parseFully(final CSVParser parser) {
125 parser.forEach(Assertions::assertNotNull);
126 }
127
128 @Test
129 public void testBackslashEscaping() throws IOException {
130
131
132
133
134
135 final String code = "one,two,three\n"
136 + "'',''\n"
137 + "/',/'\n"
138 + "'/'','/''\n"
139 + "'''',''''\n"
140 + "/,,/,\n"
141 + "//,//\n" // 6) escape escaped
142 + "'//','//'\n" // 7) escape escaped in encapsulation
143 + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
144 + "9, /\n \n"
145 + "";
146 final String[][] res = {{"one", "two", "three"},
147 {"", ""},
148 {"'", "'"},
149 {"'", "'"},
150 {"'", "'"},
151 {",", ","},
152 {"/", "/"},
153 {"/", "/"},
154 {" 8 ", " \"quoted \"\" /\" / string\" "}, {"9", " \n "},};
155
156 final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines();
157
158 try (final CSVParser parser = CSVParser.parse(code, format)) {
159 final List<CSVRecord> records = parser.getRecords();
160 assertFalse(records.isEmpty());
161
162 Utils.compare("Records do not match expected result", res, records);
163 }
164 }
165
166 @Test
167 public void testBackslashEscaping2() throws IOException {
168
169
170
171
172
173 final String code = "" + " , , \n"
174 + " \t , , \n"
175 + " // , /, , /,\n" // 3)
176 + "";
177 final String[][] res = {{" ", " ", " "},
178 {" \t ", " ", " "},
179 {" / ", " , ", " ,"},
180 };
181
182 final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines();
183
184 try (final CSVParser parser = CSVParser.parse(code, format)) {
185 final List<CSVRecord> records = parser.getRecords();
186 assertFalse(records.isEmpty());
187
188 Utils.compare("", res, records);
189 }
190 }
191
192 @Test
193 @Disabled
194 public void testBackslashEscapingOld() throws IOException {
195 final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n"
196 + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\"";
197 final String[][] res = {{"one", "two", "three"}, {"on\\\"e", "two"}, {"on\"e", "two"}, {"one", "tw\"o"}, {"one", "t\\,wo"},
198
199 {"one", "two", "th,ree"}, {"a\\\\"},
200 {"a\\", "b"},
201 {"a\\\\,b"}
202 };
203 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
204 final List<CSVRecord> records = parser.getRecords();
205 assertEquals(res.length, records.size());
206 assertFalse(records.isEmpty());
207 for (int i = 0; i < res.length; i++) {
208 assertArrayEquals(res[i], records.get(i).values());
209 }
210 }
211 }
212
213 @Test
214 @Disabled("CSV-107")
215 public void testBOM() throws IOException {
216 final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv");
217 try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) {
218 parser.forEach(record -> assertNotNull(record.get("Date")));
219 }
220 }
221
222 @Test
223 public void testBOMInputStream_ParserWithInputStream() throws IOException {
224 try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv");
225 final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
226 parser.forEach(record -> assertNotNull(record.get("Date")));
227 }
228 }
229
230 @Test
231 public void testBOMInputStream_ParserWithReader() throws IOException {
232 try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
233 final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
234 parser.forEach(record -> assertNotNull(record.get("Date")));
235 }
236 }
237
238 @Test
239 public void testBOMInputStream_parseWithReader() throws IOException {
240 try (final Reader reader = new InputStreamReader(createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
241 final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
242 parser.forEach(record -> assertNotNull(record.get("Date")));
243 }
244 }
245
246 @Test
247 public void testCarriageReturnEndings() throws IOException {
248 final String code = "foo\rbaar,\rhello,world\r,kanu";
249 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
250 final List<CSVRecord> records = parser.getRecords();
251 assertEquals(4, records.size());
252 }
253 }
254
255 @Test
256 public void testCarriageReturnLineFeedEndings() throws IOException {
257 final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
258 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
259 final List<CSVRecord> records = parser.getRecords();
260 assertEquals(4, records.size());
261 }
262 }
263
264 @Test
265 public void testClose() throws Exception {
266 final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
267 final Iterator<CSVRecord> records;
268 try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
269 records = parser.iterator();
270 assertTrue(records.hasNext());
271 }
272 assertFalse(records.hasNext());
273 assertThrows(NoSuchElementException.class, records::next);
274 }
275
276 @Test
277 public void testCSV141CSVFormat_DEFAULT() throws Exception {
278 testCSV141Failure(CSVFormat.DEFAULT, 3);
279 }
280
281 @Test
282 public void testCSV141CSVFormat_INFORMIX_UNLOAD() throws Exception {
283 testCSV141Failure(CSVFormat.INFORMIX_UNLOAD, 1);
284 }
285
286 @Test
287 public void testCSV141CSVFormat_INFORMIX_UNLOAD_CSV() throws Exception {
288 testCSV141Failure(CSVFormat.INFORMIX_UNLOAD_CSV, 3);
289 }
290
291 @Test
292 public void testCSV141CSVFormat_ORACLE() throws Exception {
293 testCSV141Failure(CSVFormat.ORACLE, 2);
294 }
295
296
297 @Test
298 public void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception {
299 testCSV141Failure(CSVFormat.POSTGRESQL_CSV, 3);
300 }
301
302 @Test
303 @Disabled("PR 295 does not work")
304 public void testCSV141Excel() throws Exception {
305 testCSV141Ok(CSVFormat.EXCEL);
306 }
307
308 private void testCSV141Failure(final CSVFormat format, final int failParseRecordNo) throws IOException {
309 final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv");
310 try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) {
311
312 CSVRecord record = parse(parser, failParseRecordNo);
313 if (record == null) {
314 return;
315 }
316 assertEquals("1414770317901", record.get(0));
317 assertEquals("android.widget.EditText", record.get(1));
318 assertEquals("pass sem1 _84*|*", record.get(2));
319 assertEquals("0", record.get(3));
320 assertEquals("pass sem1 _8", record.get(4));
321 assertEquals(5, record.size());
322
323 record = parse(parser, failParseRecordNo);
324 if (record == null) {
325 return;
326 }
327 assertEquals("1414770318470", record.get(0));
328 assertEquals("android.widget.EditText", record.get(1));
329 assertEquals("pass sem1 _84:|", record.get(2));
330 assertEquals("0", record.get(3));
331 assertEquals("pass sem1 _84:\\", record.get(4));
332 assertEquals(5, record.size());
333
334 assertThrows(IOException.class, () -> parser.nextRecord());
335 }
336 }
337
338 private void testCSV141Ok(final CSVFormat format) throws IOException {
339 final Path path = Paths.get("src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv");
340 try (final CSVParser parser = CSVParser.parse(path, StandardCharsets.UTF_8, format)) {
341
342 CSVRecord record = parser.nextRecord();
343 assertEquals("1414770317901", record.get(0));
344 assertEquals("android.widget.EditText", record.get(1));
345 assertEquals("pass sem1 _84*|*", record.get(2));
346 assertEquals("0", record.get(3));
347 assertEquals("pass sem1 _8", record.get(4));
348 assertEquals(5, record.size());
349
350 record = parser.nextRecord();
351 assertEquals("1414770318470", record.get(0));
352 assertEquals("android.widget.EditText", record.get(1));
353 assertEquals("pass sem1 _84:|", record.get(2));
354 assertEquals("0", record.get(3));
355 assertEquals("pass sem1 _84:\\", record.get(4));
356 assertEquals(5, record.size());
357
358 record = parser.nextRecord();
359 assertEquals("1414770318327", record.get(0));
360 assertEquals("android.widget.EditText", record.get(1));
361 assertEquals("pass sem1", record.get(2));
362 assertEquals(3, record.size());
363
364 record = parser.nextRecord();
365 assertEquals("1414770318628", record.get(0));
366 assertEquals("android.widget.EditText", record.get(1));
367 assertEquals("pass sem1 _84*|*", record.get(2));
368 assertEquals("0", record.get(3));
369 assertEquals("pass sem1", record.get(4));
370 assertEquals(5, record.size());
371 }
372 }
373
374 @Test
375 public void testCSV141RFC4180() throws Exception {
376 testCSV141Failure(CSVFormat.RFC4180, 3);
377 }
378
379 @Test
380 public void testCSV235() throws IOException {
381 final String dqString = "\"aaa\",\"b\"\"bb\",\"ccc\"";
382 try (final CSVParser parser = CSVFormat.RFC4180.parse(new StringReader(dqString))) {
383 final Iterator<CSVRecord> records = parser.iterator();
384 final CSVRecord record = records.next();
385 assertFalse(records.hasNext());
386 assertEquals(3, record.size());
387 assertEquals("aaa", record.get(0));
388 assertEquals("b\"bb", record.get(1));
389 assertEquals("ccc", record.get(2));
390 }
391 }
392
393 @Test
394 public void testCSV57() throws Exception {
395 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
396 final List<CSVRecord> list = parser.getRecords();
397 assertNotNull(list);
398 assertEquals(0, list.size());
399 }
400 }
401
402 @Test
403 public void testDefaultFormat() throws IOException {
404 final String code = "" + "a,b#\n"
405 + "\"\n\",\" \",#\n"
406 + "#,\"\"\n"
407 + "# Final comment\n"
408 ;
409 final String[][] res = {{"a", "b#"}, {"\n", " ", "#"}, {"#", ""}, {"# Final comment"}};
410
411 CSVFormat format = CSVFormat.DEFAULT;
412 assertFalse(format.isCommentMarkerSet());
413 final String[][] res_comments = {{"a", "b#"}, {"\n", " ", "#"},};
414
415 try (final CSVParser parser = CSVParser.parse(code, format)) {
416 final List<CSVRecord> records = parser.getRecords();
417 assertFalse(records.isEmpty());
418
419 Utils.compare("Failed to parse without comments", res, records);
420
421 format = CSVFormat.DEFAULT.withCommentMarker('#');
422 }
423 try (final CSVParser parser = CSVParser.parse(code, format)) {
424 final List<CSVRecord> records = parser.getRecords();
425
426 Utils.compare("Failed to parse with comments", res_comments, records);
427 }
428 }
429
430 @Test
431 public void testDuplicateHeadersAllowedByDefault() throws Exception {
432 try (CSVParser parser = CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader())) {
433
434 }
435 }
436
437 @Test
438 public void testDuplicateHeadersNotAllowed() {
439 assertThrows(IllegalArgumentException.class,
440 () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false)));
441 }
442
443 @Test
444 public void testEmptyFile() throws Exception {
445 try (final CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8,
446 CSVFormat.DEFAULT)) {
447 assertNull(parser.nextRecord());
448 }
449 }
450
451 @Test
452 public void testEmptyFileHeaderParsing() throws Exception {
453 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT.withFirstRecordAsHeader())) {
454 assertNull(parser.nextRecord());
455 assertTrue(parser.getHeaderNames().isEmpty());
456 }
457 }
458
459 @Test
460 public void testEmptyLineBehaviorCSV() throws Exception {
461 final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"};
462 final String[][] res = {{"hello", ""}
463 };
464 for (final String code : codes) {
465 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
466 final List<CSVRecord> records = parser.getRecords();
467 assertEquals(res.length, records.size());
468 assertFalse(records.isEmpty());
469 for (int i = 0; i < res.length; i++) {
470 assertArrayEquals(res[i], records.get(i).values());
471 }
472 }
473 }
474 }
475
476 @Test
477 public void testEmptyLineBehaviorExcel() throws Exception {
478 final String[] codes = {"hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"};
479 final String[][] res = {{"hello", ""}, {""},
480 {""}};
481 for (final String code : codes) {
482 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
483 final List<CSVRecord> records = parser.getRecords();
484 assertEquals(res.length, records.size());
485 assertFalse(records.isEmpty());
486 for (int i = 0; i < res.length; i++) {
487 assertArrayEquals(res[i], records.get(i).values());
488 }
489 }
490 }
491 }
492
493 @Test
494 public void testEmptyString() throws Exception {
495 try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
496 assertNull(parser.nextRecord());
497 }
498 }
499
500 @Test
501 public void testEndOfFileBehaviorCSV() throws Exception {
502 final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"",
503 "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""};
504 final String[][] res = {{"hello", ""},
505 {"world", ""}};
506 for (final String code : codes) {
507 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
508 final List<CSVRecord> records = parser.getRecords();
509 assertEquals(res.length, records.size());
510 assertFalse(records.isEmpty());
511 for (int i = 0; i < res.length; i++) {
512 assertArrayEquals(res[i], records.get(i).values());
513 }
514 }
515 }
516 }
517
518 @Test
519 public void testEndOfFileBehaviorExcel() throws Exception {
520 final String[] codes = {"hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", "hello,\r\n\r\nworld,\"\"",
521 "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\""};
522 final String[][] res = {{"hello", ""}, {""},
523 {"world", ""}};
524
525 for (final String code : codes) {
526 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
527 final List<CSVRecord> records = parser.getRecords();
528 assertEquals(res.length, records.size());
529 assertFalse(records.isEmpty());
530 for (int i = 0; i < res.length; i++) {
531 assertArrayEquals(res[i], records.get(i).values());
532 }
533 }
534 }
535 }
536
537 @Test
538 public void testExcelFormat1() throws IOException {
539 final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
540 final String[][] res = {{"value1", "value2", "value3", "value4"}, {"a", "b", "c", "d"}, {" x", "", "", ""}, {""},
541 {"\"hello\"", " \"world\"", "abc\ndef", ""}};
542 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
543 final List<CSVRecord> records = parser.getRecords();
544 assertEquals(res.length, records.size());
545 assertFalse(records.isEmpty());
546 for (int i = 0; i < res.length; i++) {
547 assertArrayEquals(res[i], records.get(i).values());
548 }
549 }
550 }
551
552 @Test
553 public void testExcelFormat2() throws Exception {
554 final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
555 final String[][] res = {{"foo", "baar"}, {""}, {"hello", ""}, {""}, {"world", ""}};
556 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
557 final List<CSVRecord> records = parser.getRecords();
558 assertEquals(res.length, records.size());
559 assertFalse(records.isEmpty());
560 for (int i = 0; i < res.length; i++) {
561 assertArrayEquals(res[i], records.get(i).values());
562 }
563 }
564 }
565
566
567
568
569 @Test
570 public void testExcelHeaderCountLessThanData() throws Exception {
571 final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
572 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
573 parser.getRecords().forEach(record -> {
574 assertEquals("a", record.get("A"));
575 assertEquals("b", record.get("B"));
576 assertEquals("c", record.get("C"));
577 });
578 }
579 }
580
581 @Test
582 public void testFirstEndOfLineCr() throws IOException {
583 final String data = "foo\rbaar,\rhello,world\r,kanu";
584 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
585 final List<CSVRecord> records = parser.getRecords();
586 assertEquals(4, records.size());
587 assertEquals("\r", parser.getFirstEndOfLine());
588 }
589 }
590
591 @Test
592 public void testFirstEndOfLineCrLf() throws IOException {
593 final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
594 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
595 final List<CSVRecord> records = parser.getRecords();
596 assertEquals(4, records.size());
597 assertEquals("\r\n", parser.getFirstEndOfLine());
598 }
599 }
600
601 @Test
602 public void testFirstEndOfLineLf() throws IOException {
603 final String data = "foo\nbaar,\nhello,world\n,kanu";
604 try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
605 final List<CSVRecord> records = parser.getRecords();
606 assertEquals(4, records.size());
607 assertEquals("\n", parser.getFirstEndOfLine());
608 }
609 }
610
611 @Test
612 public void testForEach() throws Exception {
613 try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
614 final List<CSVRecord> records = new ArrayList<>();
615 for (final CSVRecord record : parser) {
616 records.add(record);
617 }
618 assertEquals(3, records.size());
619 assertArrayEquals(new String[] {"a", "b", "c"}, records.get(0).values());
620 assertArrayEquals(new String[] {"1", "2", "3"}, records.get(1).values());
621 assertArrayEquals(new String[] {"x", "y", "z"}, records.get(2).values());
622 }
623 }
624
625 @Test
626 public void testGetHeaderComment_HeaderComment1() throws IOException {
627 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
628 parser.getRecords();
629
630 assertTrue(parser.hasHeaderComment());
631 assertEquals("header comment", parser.getHeaderComment());
632 }
633 }
634
635 @Test
636 public void testGetHeaderComment_HeaderComment2() throws IOException {
637 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
638 parser.getRecords();
639
640 assertTrue(parser.hasHeaderComment());
641 assertEquals("header comment", parser.getHeaderComment());
642 }
643 }
644
645 @Test
646 public void testGetHeaderComment_HeaderComment3() throws IOException {
647 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
648 parser.getRecords();
649
650 assertFalse(parser.hasHeaderComment());
651 assertNull(parser.getHeaderComment());
652 }
653 }
654
655 @Test
656 public void testGetHeaderComment_HeaderTrailerComment() throws IOException {
657 try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
658 parser.getRecords();
659
660 assertTrue(parser.hasHeaderComment());
661 assertEquals("multi-line"+LF+"header comment", parser.getHeaderComment());
662 }
663 }
664
665 @Test
666 public void testGetHeaderComment_NoComment1() throws IOException {
667 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) {
668 parser.getRecords();
669
670 assertFalse(parser.hasHeaderComment());
671 assertNull(parser.getHeaderComment());
672 }
673 }
674
675 @Test
676 public void testGetHeaderComment_NoComment2() throws IOException {
677 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) {
678 parser.getRecords();
679
680 assertFalse(parser.hasHeaderComment());
681 assertNull(parser.getHeaderComment());
682 }
683 }
684
685 @Test
686 public void testGetHeaderComment_NoComment3() throws IOException {
687 try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
688 parser.getRecords();
689
690 assertFalse(parser.hasHeaderComment());
691 assertNull(parser.getHeaderComment());
692 }
693 }
694
695 @Test
696 public void testGetHeaderMap() throws Exception {
697 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
698 final Map<String, Integer> headerMap = parser.getHeaderMap();
699 final Iterator<String> columnNames = headerMap.keySet().iterator();
700
701 assertEquals("A", columnNames.next());
702 assertEquals("B", columnNames.next());
703 assertEquals("C", columnNames.next());
704 final Iterator<CSVRecord> records = parser.iterator();
705
706
707 for (int i = 0; i < 3; i++) {
708 assertTrue(records.hasNext());
709 final CSVRecord record = records.next();
710 assertEquals(record.get(0), record.get("A"));
711 assertEquals(record.get(1), record.get("B"));
712 assertEquals(record.get(2), record.get("C"));
713 }
714
715 assertFalse(records.hasNext());
716 }
717 }
718
719 @Test
720 public void testGetHeaderNames() throws IOException {
721 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
722 final Map<String, Integer> nameIndexMap = parser.getHeaderMap();
723 final List<String> headerNames = parser.getHeaderNames();
724 assertNotNull(headerNames);
725 assertEquals(nameIndexMap.size(), headerNames.size());
726 for (int i = 0; i < headerNames.size(); i++) {
727 final String name = headerNames.get(i);
728 assertEquals(i, nameIndexMap.get(name).intValue());
729 }
730 }
731 }
732
733 @Test
734 public void testGetHeaderNamesReadOnly() throws IOException {
735 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
736 final List<String> headerNames = parser.getHeaderNames();
737 assertNotNull(headerNames);
738 assertThrows(UnsupportedOperationException.class, () -> headerNames.add("This is a read-only list."));
739 }
740 }
741
742 @Test
743 public void testGetLine() throws IOException {
744 try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
745 for (final String[] re : RESULT) {
746 assertArrayEquals(re, parser.nextRecord().values());
747 }
748
749 assertNull(parser.nextRecord());
750 }
751 }
752
753 @Test
754 public void testGetLineNumberWithCR() throws Exception {
755 this.validateLineNumbers(String.valueOf(CR));
756 }
757
758 @Test
759 public void testGetLineNumberWithCRLF() throws Exception {
760 this.validateLineNumbers(CRLF);
761 }
762
763 @Test
764 public void testGetLineNumberWithLF() throws Exception {
765 this.validateLineNumbers(String.valueOf(LF));
766 }
767
768 @Test
769 public void testGetOneLine() throws IOException {
770 try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) {
771 final CSVRecord record = parser.getRecords().get(0);
772 assertArrayEquals(RESULT[0], record.values());
773 }
774 }
775
776
777
778
779
780
781 @Test
782 public void testGetOneLineOneParser() throws IOException {
783 final CSVFormat format = CSVFormat.DEFAULT;
784 try (final PipedWriter writer = new PipedWriter(); final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
785 writer.append(CSV_INPUT_1);
786 writer.append(format.getRecordSeparator());
787 final CSVRecord record1 = parser.nextRecord();
788 assertArrayEquals(RESULT[0], record1.values());
789 writer.append(CSV_INPUT_2);
790 writer.append(format.getRecordSeparator());
791 final CSVRecord record2 = parser.nextRecord();
792 assertArrayEquals(RESULT[1], record2.values());
793 }
794 }
795
796 @Test
797 public void testGetRecordNumberWithCR() throws Exception {
798 this.validateRecordNumbers(String.valueOf(CR));
799 }
800
801 @Test
802 public void testGetRecordNumberWithCRLF() throws Exception {
803 this.validateRecordNumbers(CRLF);
804 }
805
806 @Test
807 public void testGetRecordNumberWithLF() throws Exception {
808 this.validateRecordNumbers(String.valueOf(LF));
809 }
810
811 @Test
812 public void testGetRecordPositionWithCRLF() throws Exception {
813 this.validateRecordPosition(CRLF);
814 }
815
816 @Test
817 public void testGetRecordPositionWithLF() throws Exception {
818 this.validateRecordPosition(String.valueOf(LF));
819 }
820
821 @Test
822 public void testGetRecords() throws IOException {
823 try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
824 final List<CSVRecord> records = parser.getRecords();
825 assertEquals(RESULT.length, records.size());
826 assertFalse(records.isEmpty());
827 for (int i = 0; i < RESULT.length; i++) {
828 assertArrayEquals(RESULT[i], records.get(i).values());
829 }
830 }
831 }
832
833 @Test
834 public void testGetRecordsFromBrokenInputStream() throws IOException {
835 @SuppressWarnings("resource")
836 final CSVParser parser = CSVParser.parse(new BrokenInputStream(), UTF_8, CSVFormat.DEFAULT);
837 assertThrows(UncheckedIOException.class, parser::getRecords);
838
839 }
840
841 @Test
842 public void testGetRecordWithMultiLineValues() throws Exception {
843 try (final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",
844 CSVFormat.DEFAULT.withRecordSeparator(CRLF))) {
845 CSVRecord record;
846 assertEquals(0, parser.getRecordNumber());
847 assertEquals(0, parser.getCurrentLineNumber());
848 assertNotNull(record = parser.nextRecord());
849 assertEquals(3, parser.getCurrentLineNumber());
850 assertEquals(1, record.getRecordNumber());
851 assertEquals(1, parser.getRecordNumber());
852 assertNotNull(record = parser.nextRecord());
853 assertEquals(6, parser.getCurrentLineNumber());
854 assertEquals(2, record.getRecordNumber());
855 assertEquals(2, parser.getRecordNumber());
856 assertNotNull(record = parser.nextRecord());
857 assertEquals(9, parser.getCurrentLineNumber());
858 assertEquals(3, record.getRecordNumber());
859 assertEquals(3, parser.getRecordNumber());
860 assertNull(record = parser.nextRecord());
861 assertEquals(9, parser.getCurrentLineNumber());
862 assertEquals(3, parser.getRecordNumber());
863 }
864 }
865
866 @Test
867 public void testGetTrailerComment_HeaderComment1() throws IOException {
868 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
869 parser.getRecords();
870 assertFalse(parser.hasTrailerComment());
871 assertNull(parser.getTrailerComment());
872 }
873 }
874
875 @Test
876 public void testGetTrailerComment_HeaderComment2() throws IOException {
877 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
878 parser.getRecords();
879 assertFalse(parser.hasTrailerComment());
880 assertNull(parser.getTrailerComment());
881 }
882 }
883
884 @Test
885 public void testGetTrailerComment_HeaderComment3() throws IOException {
886 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
887 parser.getRecords();
888 assertFalse(parser.hasTrailerComment());
889 assertNull(parser.getTrailerComment());
890 }
891 }
892
893 @Test
894 public void testGetTrailerComment_HeaderTrailerComment1() throws IOException {
895 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
896 parser.getRecords();
897 assertTrue(parser.hasTrailerComment());
898 assertEquals("comment", parser.getTrailerComment());
899 }
900 }
901
902 @Test
903 public void testGetTrailerComment_HeaderTrailerComment2() throws IOException {
904 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
905 parser.getRecords();
906 assertTrue(parser.hasTrailerComment());
907 assertEquals("comment", parser.getTrailerComment());
908 }
909 }
910
911 @Test
912 public void testGetTrailerComment_HeaderTrailerComment3() throws IOException {
913 try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
914 parser.getRecords();
915 assertTrue(parser.hasTrailerComment());
916 assertEquals("comment", parser.getTrailerComment());
917 }
918 }
919
920 @Test
921 public void testGetTrailerComment_MultilineComment() throws IOException {
922 try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
923 parser.getRecords();
924 assertTrue(parser.hasTrailerComment());
925 assertEquals("multi-line"+LF+"comment", parser.getTrailerComment());
926 }
927 }
928
929 @Test
930 public void testHeader() throws Exception {
931 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
932
933 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
934 final Iterator<CSVRecord> records = parser.iterator();
935
936 for (int i = 0; i < 2; i++) {
937 assertTrue(records.hasNext());
938 final CSVRecord record = records.next();
939 assertEquals(record.get(0), record.get("a"));
940 assertEquals(record.get(1), record.get("b"));
941 assertEquals(record.get(2), record.get("c"));
942 }
943
944 assertFalse(records.hasNext());
945 }
946 }
947
948 @Test
949 public void testHeaderComment() throws Exception {
950 final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
951
952 try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
953 final Iterator<CSVRecord> records = parser.iterator();
954
955 for (int i = 0; i < 2; i++) {
956 assertTrue(records.hasNext());
957 final CSVRecord record = records.next();
958 assertEquals(record.get(0), record.get("a"));
959 assertEquals(record.get(1), record.get("b"));
960 assertEquals(record.get(2), record.get("c"));
961 }
962
963 assertFalse(records.hasNext());
964 }
965 }
966
967 @Test
968 public void testHeaderMissing() throws Exception {
969 final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z");
970
971 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) {
972 final Iterator<CSVRecord> records = parser.iterator();
973
974 for (int i = 0; i < 2; i++) {
975 assertTrue(records.hasNext());
976 final CSVRecord record = records.next();
977 assertEquals(record.get(0), record.get("a"));
978 assertEquals(record.get(2), record.get("c"));
979 }
980
981 assertFalse(records.hasNext());
982 }
983 }
984
985 @Test
986 public void testHeaderMissingWithNull() throws Exception {
987 final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
988 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in)) {
989 parser.iterator();
990 }
991 }
992
993 @Test
994 public void testHeadersMissing() throws Exception {
995 try (final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
996 final CSVParser parser = CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in)) {
997 parser.iterator();
998 }
999 }
1000
1001 @Test
1002 public void testHeadersMissingException() {
1003 final Reader in = new StringReader("a,,c,,e\n1,2,3,4,5\nv,w,x,y,z");
1004 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
1005 }
1006
1007 @Test
1008 public void testHeadersMissingOneColumnException() {
1009 final Reader in = new StringReader("a,,c,d,e\n1,2,3,4,5\nv,w,x,y,z");
1010 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withHeader().parse(in).iterator());
1011 }
1012
1013 @Test
1014 public void testHeadersWithNullColumnName() throws IOException {
1015 final Reader in = new StringReader("header1,null,header3\n1,2,3\n4,5,6");
1016 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().withNullString("null").withAllowMissingColumnNames().parse(in)) {
1017 final Iterator<CSVRecord> records = parser.iterator();
1018 final CSVRecord record = records.next();
1019
1020 @SuppressWarnings("resource")
1021 final CSVParser recordParser = record.getParser();
1022 assertEquals(Arrays.asList("header1", "header3"), recordParser.getHeaderNames());
1023 assertEquals(2, recordParser.getHeaderMap().size());
1024 }
1025 }
1026
1027 @Test
1028 public void testIgnoreCaseHeaderMapping() throws Exception {
1029 final Reader reader = new StringReader("1,2,3");
1030 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase().parse(reader)) {
1031 final Iterator<CSVRecord> records = parser.iterator();
1032 final CSVRecord record = records.next();
1033 assertEquals("1", record.get("one"));
1034 assertEquals("2", record.get("two"));
1035 assertEquals("3", record.get("THREE"));
1036 }}
1037
1038 @Test
1039 public void testIgnoreEmptyLines() throws IOException {
1040 final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
1041
1042
1043 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
1044 final List<CSVRecord> records = parser.getRecords();
1045 assertEquals(3, records.size());
1046 }
1047 }
1048
1049 @Test
1050 public void testInvalidFormat() {
1051 assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withDelimiter(CR));
1052 }
1053
1054 @Test
1055 public void testIterator() throws Exception {
1056 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1057
1058 try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
1059 final Iterator<CSVRecord> iterator = parser.iterator();
1060
1061 assertTrue(iterator.hasNext());
1062 assertThrows(UnsupportedOperationException.class, iterator::remove);
1063 assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values());
1064 assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values());
1065 assertTrue(iterator.hasNext());
1066 assertTrue(iterator.hasNext());
1067 assertTrue(iterator.hasNext());
1068 assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values());
1069 assertFalse(iterator.hasNext());
1070
1071 assertThrows(NoSuchElementException.class, iterator::next);
1072 }}
1073
1074 @Test
1075 public void testIteratorSequenceBreaking() throws IOException {
1076 final String fiveRows = "1\n2\n3\n4\n5\n";
1077
1078
1079 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1080
1081 final Iterator<CSVRecord> iter = parser.iterator();
1082 int recordNumber = 0;
1083 while (iter.hasNext()) {
1084 final CSVRecord record = iter.next();
1085 recordNumber++;
1086 assertEquals(String.valueOf(recordNumber), record.get(0));
1087 if (recordNumber >= 2) {
1088 break;
1089 }
1090 }
1091 iter.hasNext();
1092 while (iter.hasNext()) {
1093 final CSVRecord record = iter.next();
1094 recordNumber++;
1095 assertEquals(String.valueOf(recordNumber), record.get(0));
1096 }
1097 }
1098
1099
1100 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1101 int recordNumber = 0;
1102 for (final CSVRecord record : parser) {
1103 recordNumber++;
1104 assertEquals(String.valueOf(recordNumber), record.get(0));
1105 if (recordNumber >= 2) {
1106 break;
1107 }
1108 }
1109 for (final CSVRecord record : parser) {
1110 recordNumber++;
1111 assertEquals(String.valueOf(recordNumber), record.get(0));
1112 }
1113 }
1114
1115
1116 try (CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows))) {
1117 int recordNumber = 0;
1118 for (final CSVRecord record : parser) {
1119 recordNumber++;
1120 assertEquals(String.valueOf(recordNumber), record.get(0));
1121 if (recordNumber >= 2) {
1122 break;
1123 }
1124 }
1125 parser.iterator().hasNext();
1126 for (final CSVRecord record : parser) {
1127 recordNumber++;
1128 assertEquals(String.valueOf(recordNumber), record.get(0));
1129 }
1130 }
1131 }
1132
1133 @Test
1134 public void testLineFeedEndings() throws IOException {
1135 final String code = "foo\nbaar,\nhello,world\n,kanu";
1136 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
1137 final List<CSVRecord> records = parser.getRecords();
1138 assertEquals(4, records.size());
1139 }
1140 }
1141
1142 @Test
1143 public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
1144 final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
1145 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)) {
1146 final Iterator<CSVRecord> records = parser.iterator();
1147 CSVRecord record;
1148
1149
1150 record = records.next();
1151 assertTrue(record.isMapped("A"));
1152 assertTrue(record.isMapped("B"));
1153 assertTrue(record.isMapped("C"));
1154 assertTrue(record.isSet("A"));
1155 assertTrue(record.isSet("B"));
1156 assertFalse(record.isSet("C"));
1157 assertEquals("1", record.get("A"));
1158 assertEquals("2", record.get("B"));
1159 assertFalse(record.isConsistent());
1160
1161
1162 record = records.next();
1163 assertTrue(record.isMapped("A"));
1164 assertTrue(record.isMapped("B"));
1165 assertTrue(record.isMapped("C"));
1166 assertTrue(record.isSet("A"));
1167 assertTrue(record.isSet("B"));
1168 assertTrue(record.isSet("C"));
1169 assertEquals("x", record.get("A"));
1170 assertEquals("y", record.get("B"));
1171 assertEquals("z", record.get("C"));
1172 assertTrue(record.isConsistent());
1173
1174 assertFalse(records.hasNext());
1175 }
1176 }
1177
1178 @Test
1179 @Disabled
1180 public void testMongoDbCsv() throws Exception {
1181 try (final CSVParser parser = CSVParser.parse("\"a a\",b,c" + LF + "d,e,f", CSVFormat.MONGODB_CSV)) {
1182 final Iterator<CSVRecord> itr1 = parser.iterator();
1183 final Iterator<CSVRecord> itr2 = parser.iterator();
1184
1185 final CSVRecord first = itr1.next();
1186 assertEquals("a a", first.get(0));
1187 assertEquals("b", first.get(1));
1188 assertEquals("c", first.get(2));
1189
1190 final CSVRecord second = itr2.next();
1191 assertEquals("d", second.get(0));
1192 assertEquals("e", second.get(1));
1193 assertEquals("f", second.get(2));
1194 }
1195 }
1196
1197 @Test
1198
1199 public void testMultipleIterators() throws Exception {
1200 try (final CSVParser parser = CSVParser.parse("a,b,c" + CRLF + "d,e,f", CSVFormat.DEFAULT)) {
1201 final Iterator<CSVRecord> itr1 = parser.iterator();
1202
1203 final CSVRecord first = itr1.next();
1204 assertEquals("a", first.get(0));
1205 assertEquals("b", first.get(1));
1206 assertEquals("c", first.get(2));
1207
1208 final CSVRecord second = itr1.next();
1209 assertEquals("d", second.get(0));
1210 assertEquals("e", second.get(1));
1211 assertEquals("f", second.get(2));
1212 }
1213 }
1214
1215 @Test
1216 public void testNewCSVParserNullReaderFormat() {
1217 assertThrows(NullPointerException.class, () -> new CSVParser(null, CSVFormat.DEFAULT));
1218 }
1219
1220 @Test
1221 public void testNewCSVParserReaderNullFormat() {
1222 assertThrows(NullPointerException.class, () -> new CSVParser(new StringReader(""), null));
1223 }
1224
1225 @Test
1226 public void testNoHeaderMap() throws Exception {
1227 try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) {
1228 assertNull(parser.getHeaderMap());
1229 }
1230 }
1231
1232 @Test
1233 public void testNotValueCSV() throws IOException {
1234 final String source = "#";
1235 final CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#');
1236 try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1237 final CSVRecord csvRecord = csvParser.nextRecord();
1238 assertNull(csvRecord);
1239 }
1240 }
1241
1242 @Test
1243 public void testParse() throws Exception {
1244 final ClassLoader loader = ClassLoader.getSystemClassLoader();
1245 final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv");
1246 final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
1247 final Charset charset = StandardCharsets.UTF_8;
1248
1249 try (@SuppressWarnings("resource")
1250 final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) {
1251 parseFully(parser);
1252 }
1253 try (final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) {
1254 parseFully(parser);
1255 }
1256 try (final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) {
1257 parseFully(parser);
1258 }
1259 try (@SuppressWarnings("resource")
1260 final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) {
1261 parseFully(parser);
1262 }
1263 try (final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) {
1264 parseFully(parser);
1265 }
1266 try (final CSVParser parser = CSVParser.parse(url, charset, format)) {
1267 parseFully(parser);
1268 }
1269 try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) {
1270 parseFully(parser);
1271 }
1272 try (final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, 0, 1)) {
1273 parseFully(parser);
1274 }
1275 }
1276
1277 @Test
1278 public void testParseFileNullFormat() {
1279 assertThrows(NullPointerException.class, () -> CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null));
1280 }
1281
1282 @Test
1283 public void testParseNullFileFormat() {
1284 assertThrows(NullPointerException.class, () -> CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1285 }
1286
1287 @Test
1288 public void testParseNullPathFormat() {
1289 assertThrows(NullPointerException.class, () -> CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1290 }
1291
1292 @Test
1293 public void testParseNullStringFormat() {
1294 assertThrows(NullPointerException.class, () -> CSVParser.parse((String) null, CSVFormat.DEFAULT));
1295 }
1296
1297 @Test
1298 public void testParseNullUrlCharsetFormat() {
1299 assertThrows(NullPointerException.class, () -> CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT));
1300 }
1301
1302 @Test
1303 public void testParserUrlNullCharsetFormat() {
1304 assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), null, CSVFormat.DEFAULT));
1305 }
1306
1307 @Test
1308 public void testParseStringNullFormat() {
1309 assertThrows(NullPointerException.class, () -> CSVParser.parse("csv data", (CSVFormat) null));
1310 }
1311
1312 @Test
1313 public void testParseUrlCharsetNullFormat() {
1314 assertThrows(NullPointerException.class, () -> CSVParser.parse(new URL("https://commons.apache.org"), Charset.defaultCharset(), null));
1315 }
1316
1317 @Test
1318 public void testParseWithDelimiterStringWithEscape() throws IOException {
1319 final String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz";
1320 final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build();
1321 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1322 CSVRecord csvRecord = csvParser.nextRecord();
1323 assertEquals("a[|]b![|]c", csvRecord.get(0));
1324 assertEquals("xyz", csvRecord.get(1));
1325 csvRecord = csvParser.nextRecord();
1326 assertEquals("abc[abc]", csvRecord.get(0));
1327 assertEquals("xyz", csvRecord.get(1));
1328 }
1329 }
1330 @Test
1331 public void testParseWithDelimiterStringWithQuote() throws IOException {
1332 final String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz";
1333 final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build();
1334 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1335 CSVRecord csvRecord = csvParser.nextRecord();
1336 assertEquals("a[|]b[|]c", csvRecord.get(0));
1337 assertEquals("xyz", csvRecord.get(1));
1338 csvRecord = csvParser.nextRecord();
1339 assertEquals("abc[abc]", csvRecord.get(0));
1340 assertEquals("xyz", csvRecord.get(1));
1341 }
1342 }
1343 @Test
1344 public void testParseWithDelimiterWithEscape() throws IOException {
1345 final String source = "a!,b!,c,xyz";
1346 final CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!');
1347 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1348 final CSVRecord csvRecord = csvParser.nextRecord();
1349 assertEquals("a,b,c", csvRecord.get(0));
1350 assertEquals("xyz", csvRecord.get(1));
1351 }
1352 }
1353 @Test
1354 public void testParseWithDelimiterWithQuote() throws IOException {
1355 final String source = "'a,b,c',xyz";
1356 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
1357 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1358 final CSVRecord csvRecord = csvParser.nextRecord();
1359 assertEquals("a,b,c", csvRecord.get(0));
1360 assertEquals("xyz", csvRecord.get(1));
1361 }
1362 }
1363 @Test
1364 public void testParseWithQuoteThrowsException() {
1365 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
1366 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord());
1367 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord());
1368 assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord());
1369 }
1370 @Test
1371 public void testParseWithQuoteWithEscape() throws IOException {
1372 final String source = "'a?,b?,c?d',xyz";
1373 final CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?');
1374 try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
1375 final CSVRecord csvRecord = csvParser.nextRecord();
1376 assertEquals("a,b,c?d", csvRecord.get(0));
1377 assertEquals("xyz", csvRecord.get(1));
1378 }
1379 }
1380 @Test
1381 public void testProvidedHeader() throws Exception {
1382 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1383
1384 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in)) {
1385 final Iterator<CSVRecord> records = parser.iterator();
1386
1387 for (int i = 0; i < 3; i++) {
1388 assertTrue(records.hasNext());
1389 final CSVRecord record = records.next();
1390 assertTrue(record.isMapped("A"));
1391 assertTrue(record.isMapped("B"));
1392 assertTrue(record.isMapped("C"));
1393 assertFalse(record.isMapped("NOT MAPPED"));
1394 assertEquals(record.get(0), record.get("A"));
1395 assertEquals(record.get(1), record.get("B"));
1396 assertEquals(record.get(2), record.get("C"));
1397 }
1398
1399 assertFalse(records.hasNext());
1400 }
1401 }
1402
1403 @Test
1404 public void testProvidedHeaderAuto() throws Exception {
1405 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1406
1407 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
1408 final Iterator<CSVRecord> records = parser.iterator();
1409
1410 for (int i = 0; i < 2; i++) {
1411 assertTrue(records.hasNext());
1412 final CSVRecord record = records.next();
1413 assertTrue(record.isMapped("a"));
1414 assertTrue(record.isMapped("b"));
1415 assertTrue(record.isMapped("c"));
1416 assertFalse(record.isMapped("NOT MAPPED"));
1417 assertEquals(record.get(0), record.get("a"));
1418 assertEquals(record.get(1), record.get("b"));
1419 assertEquals(record.get(2), record.get("c"));
1420 }
1421
1422 assertFalse(records.hasNext());
1423 }
1424 }
1425
1426 @Test
1427 public void testRepeatedHeadersAreReturnedInCSVRecordHeaderNames() throws IOException {
1428 final Reader in = new StringReader("header1,header2,header1\n1,2,3\n4,5,6");
1429 try (final CSVParser parser = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().parse(in)) {
1430 final Iterator<CSVRecord> records = parser.iterator();
1431 final CSVRecord record = records.next();
1432 @SuppressWarnings("resource")
1433 final CSVParser recordParser = record.getParser();
1434 assertEquals(Arrays.asList("header1", "header2", "header1"), recordParser.getHeaderNames());
1435 }}
1436
1437 @Test
1438 public void testRoundtrip() throws Exception {
1439 final StringWriter out = new StringWriter();
1440 final String data = "a,b,c\r\n1,2,3\r\nx,y,z\r\n";
1441 try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT);
1442 final CSVParser parse = CSVParser.parse(data, CSVFormat.DEFAULT)) {
1443 for (final CSVRecord record : parse) {
1444 printer.printRecord(record);
1445 }
1446 assertEquals(data, out.toString());
1447 }
1448 }
1449
1450 @Test
1451 public void testSkipAutoHeader() throws Exception {
1452 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1453 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader().parse(in)) {
1454 final Iterator<CSVRecord> records = parser.iterator();
1455 final CSVRecord record = records.next();
1456 assertEquals("1", record.get("a"));
1457 assertEquals("2", record.get("b"));
1458 assertEquals("3", record.get("c"));
1459 }
1460 }
1461
1462 @Test
1463 public void testSkipHeaderOverrideDuplicateHeaders() throws Exception {
1464 final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z");
1465 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) {
1466 final Iterator<CSVRecord> records = parser.iterator();
1467 final CSVRecord record = records.next();
1468 assertEquals("1", record.get("X"));
1469 assertEquals("2", record.get("Y"));
1470 assertEquals("3", record.get("Z"));
1471 }}
1472
1473 @Test
1474 public void testSkipSetAltHeaders() throws Exception {
1475 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1476 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)) {
1477 final Iterator<CSVRecord> records = parser.iterator();
1478 final CSVRecord record = records.next();
1479 assertEquals("1", record.get("X"));
1480 assertEquals("2", record.get("Y"));
1481 assertEquals("3", record.get("Z"));
1482 }
1483 }
1484
1485 @Test
1486 public void testSkipSetHeader() throws Exception {
1487 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1488 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)) {
1489 final Iterator<CSVRecord> records = parser.iterator();
1490 final CSVRecord record = records.next();
1491 assertEquals("1", record.get("a"));
1492 assertEquals("2", record.get("b"));
1493 assertEquals("3", record.get("c"));
1494 }
1495 }
1496
1497 @Test
1498 @Disabled
1499 public void testStartWithEmptyLinesThenHeaders() throws Exception {
1500 final String[] codes = {"\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n"};
1501 final String[][] res = {{"hello", ""}, {""},
1502 {""}};
1503 for (final String code : codes) {
1504 try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
1505 final List<CSVRecord> records = parser.getRecords();
1506 assertEquals(res.length, records.size());
1507 assertFalse(records.isEmpty());
1508 for (int i = 0; i < res.length; i++) {
1509 assertArrayEquals(res[i], records.get(i).values());
1510 }
1511 }
1512 }
1513 }
1514
1515 @Test
1516 public void testStream() throws Exception {
1517 final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
1518 try (final CSVParser parser = CSVFormat.DEFAULT.parse(in)) {
1519 final List<CSVRecord> list = parser.stream().collect(Collectors.toList());
1520 assertFalse(list.isEmpty());
1521 assertArrayEquals(new String[] { "a", "b", "c" }, list.get(0).values());
1522 assertArrayEquals(new String[] { "1", "2", "3" }, list.get(1).values());
1523 assertArrayEquals(new String[] { "x", "y", "z" }, list.get(2).values());
1524 }}
1525
1526 @Test
1527 public void testTrailingDelimiter() throws Exception {
1528 final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,");
1529 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter().parse(in)) {
1530 final Iterator<CSVRecord> records = parser.iterator();
1531 final CSVRecord record = records.next();
1532 assertEquals("1", record.get("X"));
1533 assertEquals("2", record.get("Y"));
1534 assertEquals("3", record.get("Z"));
1535 assertEquals(3, record.size());
1536 }
1537 }
1538
1539 @Test
1540 public void testTrim() throws Exception {
1541 final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
1542 try (final CSVParser parser = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim().parse(in)) {
1543 final Iterator<CSVRecord> records = parser.iterator();
1544 final CSVRecord record = records.next();
1545 assertEquals("1", record.get("X"));
1546 assertEquals("2", record.get("Y"));
1547 assertEquals("3", record.get("Z"));
1548 assertEquals(3, record.size());
1549 }}
1550
1551 private void validateLineNumbers(final String lineSeparator) throws IOException {
1552 try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
1553 assertEquals(0, parser.getCurrentLineNumber());
1554 assertNotNull(parser.nextRecord());
1555 assertEquals(1, parser.getCurrentLineNumber());
1556 assertNotNull(parser.nextRecord());
1557 assertEquals(2, parser.getCurrentLineNumber());
1558 assertNotNull(parser.nextRecord());
1559
1560 assertEquals(3, parser.getCurrentLineNumber());
1561 assertNull(parser.nextRecord());
1562
1563 assertEquals(3, parser.getCurrentLineNumber());
1564 }
1565 }
1566
1567 private void validateRecordNumbers(final String lineSeparator) throws IOException {
1568 try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
1569 CSVRecord record;
1570 assertEquals(0, parser.getRecordNumber());
1571 assertNotNull(record = parser.nextRecord());
1572 assertEquals(1, record.getRecordNumber());
1573 assertEquals(1, parser.getRecordNumber());
1574 assertNotNull(record = parser.nextRecord());
1575 assertEquals(2, record.getRecordNumber());
1576 assertEquals(2, parser.getRecordNumber());
1577 assertNotNull(record = parser.nextRecord());
1578 assertEquals(3, record.getRecordNumber());
1579 assertEquals(3, parser.getRecordNumber());
1580 assertNull(record = parser.nextRecord());
1581 assertEquals(3, parser.getRecordNumber());
1582 }
1583 }
1584
1585 private void validateRecordPosition(final String lineSeparator) throws IOException {
1586 final String nl = lineSeparator;
1587
1588 final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
1589
1590 "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
1591
1592
1593 "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
1594
1595 final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
1596 CSVParser parser = CSVParser.parse(code, format);
1597
1598 CSVRecord record;
1599 assertEquals(0, parser.getRecordNumber());
1600
1601 assertNotNull(record = parser.nextRecord());
1602 assertEquals(1, record.getRecordNumber());
1603 assertEquals(code.indexOf('a'), record.getCharacterPosition());
1604
1605 assertNotNull(record = parser.nextRecord());
1606 assertEquals(2, record.getRecordNumber());
1607 assertEquals(code.indexOf('1'), record.getCharacterPosition());
1608
1609 assertNotNull(record = parser.nextRecord());
1610 final long positionRecord3 = record.getCharacterPosition();
1611 assertEquals(3, record.getRecordNumber());
1612 assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1613 assertEquals("A" + lineSeparator + "A", record.get(0));
1614 assertEquals("B" + lineSeparator + "B", record.get(1));
1615 assertEquals("CC", record.get(2));
1616
1617 assertNotNull(record = parser.nextRecord());
1618 assertEquals(4, record.getRecordNumber());
1619 assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1620
1621 assertNotNull(record = parser.nextRecord());
1622 assertEquals(5, record.getRecordNumber());
1623 assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
1624
1625 parser.close();
1626
1627
1628 parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3);
1629
1630 assertNotNull(record = parser.nextRecord());
1631 assertEquals(3, record.getRecordNumber());
1632 assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1633 assertEquals("A" + lineSeparator + "A", record.get(0));
1634 assertEquals("B" + lineSeparator + "B", record.get(1));
1635 assertEquals("CC", record.get(2));
1636
1637 assertNotNull(record = parser.nextRecord());
1638 assertEquals(4, record.getRecordNumber());
1639 assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1640 assertEquals("\u00c4", record.get(0));
1641
1642 parser.close();
1643 }
1644
1645 }