View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.CR;
21  import static org.apache.commons.csv.Constants.CRLF;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.junit.Assert.assertArrayEquals;
24  import static org.junit.Assert.assertEquals;
25  import static org.junit.Assert.assertFalse;
26  import static org.junit.Assert.assertNotNull;
27  import static org.junit.Assert.assertNull;
28  import static org.junit.Assert.assertTrue;
29  import static org.junit.Assert.fail;
30  
31  import java.io.File;
32  import java.io.IOException;
33  import java.io.InputStreamReader;
34  import java.io.PipedReader;
35  import java.io.PipedWriter;
36  import java.io.Reader;
37  import java.io.StringReader;
38  import java.io.StringWriter;
39  import java.net.URL;
40  import java.nio.charset.Charset;
41  import java.nio.charset.StandardCharsets;
42  import java.util.ArrayList;
43  import java.util.Iterator;
44  import java.util.List;
45  import java.util.Map;
46  import java.util.NoSuchElementException;
47  
48  import org.apache.commons.io.input.BOMInputStream;
49  import org.junit.Assert;
50  import org.junit.Ignore;
51  import org.junit.Test;
52  
53  /**
54   * CSVParserTest
55   *
56   * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the
57   * parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely
58   * that the parser itself fails if the lexer has problems...).
59   */
60  public class CSVParserTest {
61  
62      private static final Charset UTF_8 = StandardCharsets.UTF_8;
63  
64      private static final String UTF_8_NAME = UTF_8.name();
65  
66      private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n"
67              // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
68              + "   \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
69  
70      private static final String CSV_INPUT_1 = "a,b,c,d";
71  
72      private static final String CSV_INPUT_2 = "a,b,1 2";
73  
74      private static final String[][] RESULT = { { "a", "b", "c", "d" }, { "a", "b", "1 2" }, { "foo baar", "b", "" },
75              { "foo\n,,\n\",,\n\"", "d", "e" } };
76  
77      private BOMInputStream createBOMInputStream(String resource) throws IOException {
78          final URL url = ClassLoader.getSystemClassLoader().getResource(resource);
79          return new BOMInputStream(url.openStream());
80      }
81  
82      @Test
83      public void testBackslashEscaping() throws IOException {
84  
85          // To avoid confusion over the need for escaping chars in java code,
86          // We will test with a forward slash as the escape char, and a single
87          // quote as the encapsulator.
88  
89          final String code = "one,two,three\n" // 0
90          + "'',''\n" // 1) empty encapsulators
91                  + "/',/'\n" // 2) single encapsulators
92                  + "'/'','/''\n" // 3) single encapsulators encapsulated via escape
93                  + "'''',''''\n" // 4) single encapsulators encapsulated via doubling
94                  + "/,,/,\n" // 5) separator escaped
95                  + "//,//\n" // 6) escape escaped
96                  + "'//','//'\n" // 7) escape escaped in encapsulation
97                  + "   8   ,   \"quoted \"\" /\" // string\"   \n" // don't eat spaces
98                  + "9,   /\n   \n" // escaped newline
99                  + "";
100         final String[][] res = { { "one", "two", "three" }, // 0
101                 { "", "" }, // 1
102                 { "'", "'" }, // 2
103                 { "'", "'" }, // 3
104                 { "'", "'" }, // 4
105                 { ",", "," }, // 5
106                 { "/", "/" }, // 6
107                 { "/", "/" }, // 7
108                 { "   8   ", "   \"quoted \"\" /\" / string\"   " }, { "9", "   \n   " }, };
109 
110         final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/')
111                 .withIgnoreEmptyLines();
112 
113         try (final CSVParser parser = CSVParser.parse(code, format)) {
114             final List<CSVRecord> records = parser.getRecords();
115             assertTrue(records.size() > 0);
116 
117             Utils.compare("Records do not match expected result", res, records);
118         }
119     }
120 
121     @Test
122     public void testBackslashEscaping2() throws IOException {
123 
124         // To avoid confusion over the need for escaping chars in java code,
125         // We will test with a forward slash as the escape char, and a single
126         // quote as the encapsulator.
127 
128         final String code = "" + " , , \n" // 1)
129                 + " \t ,  , \n" // 2)
130                 + " // , /, , /,\n" // 3)
131                 + "";
132         final String[][] res = { { " ", " ", " " }, // 1
133                 { " \t ", "  ", " " }, // 2
134                 { " / ", " , ", " ," }, // 3
135         };
136 
137         final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/')
138                 .withIgnoreEmptyLines();
139 
140         try (final CSVParser parser = CSVParser.parse(code, format)) {
141             final List<CSVRecord> records = parser.getRecords();
142             assertTrue(records.size() > 0);
143 
144             Utils.compare("", res, records);
145         }
146     }
147 
148     @Test
149     @Ignore
150     public void testBackslashEscapingOld() throws IOException {
151         final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" +
152                 "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\"";
153         final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" },
154                 { "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
155                 { "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
156                 { "a\\", "b" }, // a backslash must be returnd
157                 { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
158         };
159         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
160             final List<CSVRecord> records = parser.getRecords();
161             assertEquals(res.length, records.size());
162             assertTrue(records.size() > 0);
163             for (int i = 0; i < res.length; i++) {
164                 assertArrayEquals(res[i], records.get(i).values());
165             }
166         }
167     }
168 
169     @Test
170     @Ignore("CSV-107")
171     public void testBOM() throws IOException {
172         final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv");
173         try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) {
174             for (final CSVRecord record : parser) {
175                 final String string = record.get("Date");
176                 Assert.assertNotNull(string);
177                 // System.out.println("date: " + record.get("Date"));
178             }
179         }
180     }
181 
182     @Test
183     public void testBOMInputStream_ParserWithReader() throws IOException {
184         try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
185                 final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
186             for (final CSVRecord record : parser) {
187                 final String string = record.get("Date");
188                 Assert.assertNotNull(string);
189                 // System.out.println("date: " + record.get("Date"));
190             }
191         }
192     }
193 
194     @Test
195     public void testBOMInputStream_parseWithReader() throws IOException {
196         try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
197                 final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
198             for (final CSVRecord record : parser) {
199                 final String string = record.get("Date");
200                 Assert.assertNotNull(string);
201                 // System.out.println("date: " + record.get("Date"));
202             }
203         }
204     }
205 
206     @Test
207     public void testBOMInputStream_ParserWithInputStream() throws IOException {
208         try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
209                 final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
210             for (final CSVRecord record : parser) {
211                 final String string = record.get("Date");
212                 Assert.assertNotNull(string);
213                 // System.out.println("date: " + record.get("Date"));
214             }
215         }
216     }
217 
218     @Test
219     public void testCarriageReturnEndings() throws IOException {
220         final String code = "foo\rbaar,\rhello,world\r,kanu";
221         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
222             final List<CSVRecord> records = parser.getRecords();
223             assertEquals(4, records.size());
224         }
225     }
226 
227     @Test
228     public void testCarriageReturnLineFeedEndings() throws IOException {
229         final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
230         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
231             final List<CSVRecord> records = parser.getRecords();
232             assertEquals(4, records.size());
233         }
234     }
235 
236     @Test
237     public void testFirstEndOfLineCrLf() throws IOException {
238         final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
239         try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
240             final List<CSVRecord> records = parser.getRecords();
241             assertEquals(4, records.size());
242             assertEquals("\r\n", parser.getFirstEndOfLine());
243         }
244     }
245 
246     @Test
247     public void testFirstEndOfLineLf() throws IOException {
248         final String data = "foo\nbaar,\nhello,world\n,kanu";
249         try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
250             final List<CSVRecord> records = parser.getRecords();
251             assertEquals(4, records.size());
252             assertEquals("\n", parser.getFirstEndOfLine());
253         }
254     }
255 
256     @Test
257     public void testFirstEndOfLineCr() throws IOException {
258         final String data = "foo\rbaar,\rhello,world\r,kanu";
259         try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
260             final List<CSVRecord> records = parser.getRecords();
261             assertEquals(4, records.size());
262             assertEquals("\r", parser.getFirstEndOfLine());
263         }
264     }
265 
266     @Test(expected = NoSuchElementException.class)
267     public void testClose() throws Exception {
268         final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
269         final Iterator<CSVRecord> records;
270         try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
271             records = parser.iterator();
272             assertTrue(records.hasNext());
273         }
274         assertFalse(records.hasNext());
275         records.next();
276     }
277 
278     @Test
279     public void testCSV57() throws Exception {
280         try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
281             final List<CSVRecord> list = parser.getRecords();
282             assertNotNull(list);
283             assertEquals(0, list.size());
284         }
285     }
286 
287     @Test
288     public void testDefaultFormat() throws IOException {
289         final String code = "" + "a,b#\n" // 1)
290                 + "\"\n\",\" \",#\n" // 2)
291                 + "#,\"\"\n" // 3)
292                 + "# Final comment\n"// 4)
293                 ;
294         final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } };
295 
296         CSVFormat format = CSVFormat.DEFAULT;
297         assertFalse(format.isCommentMarkerSet());
298         final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" }, };
299 
300         try (final CSVParser parser = CSVParser.parse(code, format)) {
301             final List<CSVRecord> records = parser.getRecords();
302             assertTrue(records.size() > 0);
303 
304             Utils.compare("Failed to parse without comments", res, records);
305 
306             format = CSVFormat.DEFAULT.withCommentMarker('#');
307         }
308         try (final CSVParser parser = CSVParser.parse(code, format)) {
309             final List<CSVRecord> records = parser.getRecords();
310 
311             Utils.compare("Failed to parse with comments", res_comments, records);
312         }
313     }
314 
315     @Test(expected = IllegalArgumentException.class)
316     public void testDuplicateHeaders() throws Exception {
317         CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader(new String[] {}));
318     }
319 
320     @Test
321     public void testEmptyFile() throws Exception {
322         try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
323             assertNull(parser.nextRecord());
324         }
325     }
326 
327     @Test
328     public void testEmptyLineBehaviourCSV() throws Exception {
329         final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
330         final String[][] res = { { "hello", "" } // CSV format ignores empty lines
331         };
332         for (final String code : codes) {
333             try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
334                 final List<CSVRecord> records = parser.getRecords();
335                 assertEquals(res.length, records.size());
336                 assertTrue(records.size() > 0);
337                 for (int i = 0; i < res.length; i++) {
338                     assertArrayEquals(res[i], records.get(i).values());
339                 }
340             }
341         }
342     }
343 
344     @Test
345     public void testEmptyLineBehaviourExcel() throws Exception {
346         final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" };
347         final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
348                 { "" } };
349         for (final String code : codes) {
350             try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
351                 final List<CSVRecord> records = parser.getRecords();
352                 assertEquals(res.length, records.size());
353                 assertTrue(records.size() > 0);
354                 for (int i = 0; i < res.length; i++) {
355                     assertArrayEquals(res[i], records.get(i).values());
356                 }
357             }
358         }
359     }
360 
361     @Test
362     public void testEndOfFileBehaviorCSV() throws Exception {
363         final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n",
364                 "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,",
365                 "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
366         final String[][] res = { { "hello", "" }, // CSV format ignores empty lines
367                 { "world", "" } };
368         for (final String code : codes) {
369             try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
370                 final List<CSVRecord> records = parser.getRecords();
371                 assertEquals(res.length, records.size());
372                 assertTrue(records.size() > 0);
373                 for (int i = 0; i < res.length; i++) {
374                     assertArrayEquals(res[i], records.get(i).values());
375                 }
376             }
377         }
378     }
379 
380     @Test
381     public void testEndOfFileBehaviourExcel() throws Exception {
382         final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n",
383                 "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,",
384                 "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" };
385         final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
386                 { "world", "" } };
387 
388         for (final String code : codes) {
389             try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
390                 final List<CSVRecord> records = parser.getRecords();
391                 assertEquals(res.length, records.size());
392                 assertTrue(records.size() > 0);
393                 for (int i = 0; i < res.length; i++) {
394                     assertArrayEquals(res[i], records.get(i).values());
395                 }
396             }
397         }
398     }
399 
400     @Test
401     public void testExcelFormat1() throws IOException {
402         final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n  x,,," +
403                 "\r\n\r\n\"\"\"hello\"\"\",\"  \"\"world\"\"\",\"abc\ndef\",\r\n";
404         final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" },
405                 { "  x", "", "", "" }, { "" }, { "\"hello\"", "  \"world\"", "abc\ndef", "" } };
406         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
407             final List<CSVRecord> records = parser.getRecords();
408             assertEquals(res.length, records.size());
409             assertTrue(records.size() > 0);
410             for (int i = 0; i < res.length; i++) {
411                 assertArrayEquals(res[i], records.get(i).values());
412             }
413         }
414     }
415 
416     @Test
417     public void testExcelFormat2() throws Exception {
418         final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
419         final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } };
420         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
421             final List<CSVRecord> records = parser.getRecords();
422             assertEquals(res.length, records.size());
423             assertTrue(records.size() > 0);
424             for (int i = 0; i < res.length; i++) {
425                 assertArrayEquals(res[i], records.get(i).values());
426             }
427         }
428     }
429 
430     /**
431      * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers
432      */
433     @Test
434     public void testExcelHeaderCountLessThanData() throws Exception {
435         final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
436         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
437             for (final CSVRecord record : parser.getRecords()) {
438                 Assert.assertEquals("a", record.get("A"));
439                 Assert.assertEquals("b", record.get("B"));
440                 Assert.assertEquals("c", record.get("C"));
441             }
442         }
443     }
444 
445     @Test
446     public void testForEach() throws Exception {
447         final List<CSVRecord> records = new ArrayList<>();
448         try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z")) {
449             for (final CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
450                 records.add(record);
451             }
452             assertEquals(3, records.size());
453             assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values());
454             assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values());
455             assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values());
456         }
457     }
458 
459     @Test
460     public void testGetHeaderMap() throws Exception {
461         try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
462                 CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
463             final Map<String, Integer> headerMap = parser.getHeaderMap();
464             final Iterator<String> columnNames = headerMap.keySet().iterator();
465             // Headers are iterated in column order.
466             Assert.assertEquals("A", columnNames.next());
467             Assert.assertEquals("B", columnNames.next());
468             Assert.assertEquals("C", columnNames.next());
469             final Iterator<CSVRecord> records = parser.iterator();
470 
471             // Parse to make sure getHeaderMap did not have a side-effect.
472             for (int i = 0; i < 3; i++) {
473                 assertTrue(records.hasNext());
474                 final CSVRecord record = records.next();
475                 assertEquals(record.get(0), record.get("A"));
476                 assertEquals(record.get(1), record.get("B"));
477                 assertEquals(record.get(2), record.get("C"));
478             }
479 
480             assertFalse(records.hasNext());
481         }
482     }
483 
484     @Test
485     public void testGetLine() throws IOException {
486         try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
487             for (final String[] re : RESULT) {
488                 assertArrayEquals(re, parser.nextRecord().values());
489             }
490 
491             assertNull(parser.nextRecord());
492         }
493     }
494 
495     @Test
496     public void testGetLineNumberWithCR() throws Exception {
497         this.validateLineNumbers(String.valueOf(CR));
498     }
499 
500     @Test
501     public void testGetLineNumberWithCRLF() throws Exception {
502         this.validateLineNumbers(CRLF);
503     }
504 
505     @Test
506     public void testGetLineNumberWithLF() throws Exception {
507         this.validateLineNumbers(String.valueOf(LF));
508     }
509 
510     @Test
511     public void testGetOneLine() throws IOException {
512         try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) {
513             final CSVRecord record = parser.getRecords().get(0);
514             assertArrayEquals(RESULT[0], record.values());
515         }
516     }
517 
518     /**
519      * Tests reusing a parser to process new string records one at a time as they are being discovered. See [CSV-110].
520      *
521      * @throws IOException
522      */
523     @Test
524     public void testGetOneLineOneParser() throws IOException {
525         final CSVFormat format = CSVFormat.DEFAULT;
526         try (final PipedWriter writer = new PipedWriter();
527                 final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
528             writer.append(CSV_INPUT_1);
529             writer.append(format.getRecordSeparator());
530             final CSVRecord record1 = parser.nextRecord();
531             assertArrayEquals(RESULT[0], record1.values());
532             writer.append(CSV_INPUT_2);
533             writer.append(format.getRecordSeparator());
534             final CSVRecord record2 = parser.nextRecord();
535             assertArrayEquals(RESULT[1], record2.values());
536         }
537     }
538 
539     @Test
540     public void testGetRecordNumberWithCR() throws Exception {
541         this.validateRecordNumbers(String.valueOf(CR));
542     }
543 
544     @Test
545     public void testGetRecordNumberWithCRLF() throws Exception {
546         this.validateRecordNumbers(CRLF);
547     }
548 
549     @Test
550     public void testGetRecordNumberWithLF() throws Exception {
551         this.validateRecordNumbers(String.valueOf(LF));
552     }
553 
554     @Test
555     public void testGetRecordPositionWithCRLF() throws Exception {
556         this.validateRecordPosition(CRLF);
557     }
558 
559     @Test
560     public void testGetRecordPositionWithLF() throws Exception {
561         this.validateRecordPosition(String.valueOf(LF));
562     }
563 
564     @Test
565     public void testGetRecords() throws IOException {
566         try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
567             final List<CSVRecord> records = parser.getRecords();
568             assertEquals(RESULT.length, records.size());
569             assertTrue(records.size() > 0);
570             for (int i = 0; i < RESULT.length; i++) {
571                 assertArrayEquals(RESULT[i], records.get(i).values());
572             }
573         }
574     }
575 
576     @Test
577     public void testGetRecordWithMultiLineValues() throws Exception {
578         try (final CSVParser parser = CSVParser.parse(
579                 "\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",
580                 CSVFormat.DEFAULT.withRecordSeparator(CRLF))) {
581             CSVRecord record;
582             assertEquals(0, parser.getRecordNumber());
583             assertEquals(0, parser.getCurrentLineNumber());
584             assertNotNull(record = parser.nextRecord());
585             assertEquals(3, parser.getCurrentLineNumber());
586             assertEquals(1, record.getRecordNumber());
587             assertEquals(1, parser.getRecordNumber());
588             assertNotNull(record = parser.nextRecord());
589             assertEquals(6, parser.getCurrentLineNumber());
590             assertEquals(2, record.getRecordNumber());
591             assertEquals(2, parser.getRecordNumber());
592             assertNotNull(record = parser.nextRecord());
593             assertEquals(8, parser.getCurrentLineNumber());
594             assertEquals(3, record.getRecordNumber());
595             assertEquals(3, parser.getRecordNumber());
596             assertNull(record = parser.nextRecord());
597             assertEquals(8, parser.getCurrentLineNumber());
598             assertEquals(3, parser.getRecordNumber());
599         }
600     }
601 
602     @Test
603     public void testHeader() throws Exception {
604         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
605 
606         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
607 
608         for (int i = 0; i < 2; i++) {
609             assertTrue(records.hasNext());
610             final CSVRecord record = records.next();
611             assertEquals(record.get(0), record.get("a"));
612             assertEquals(record.get(1), record.get("b"));
613             assertEquals(record.get(2), record.get("c"));
614         }
615 
616         assertFalse(records.hasNext());
617     }
618 
619     @Test
620     public void testHeaderComment() throws Exception {
621         final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
622 
623         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in).iterator();
624 
625         for (int i = 0; i < 2; i++) {
626             assertTrue(records.hasNext());
627             final CSVRecord record = records.next();
628             assertEquals(record.get(0), record.get("a"));
629             assertEquals(record.get(1), record.get("b"));
630             assertEquals(record.get(2), record.get("c"));
631         }
632 
633         assertFalse(records.hasNext());
634     }
635 
636     @Test
637     public void testHeaderMissing() throws Exception {
638         final Reader in = new StringReader("a,,c\n1,2,3\nx,y,z");
639 
640         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
641 
642         for (int i = 0; i < 2; i++) {
643             assertTrue(records.hasNext());
644             final CSVRecord record = records.next();
645             assertEquals(record.get(0), record.get("a"));
646             assertEquals(record.get(2), record.get("c"));
647         }
648 
649         assertFalse(records.hasNext());
650     }
651 
652     @Test
653     public void testHeaderMissingWithNull() throws Exception {
654         final Reader in = new StringReader("a,,c,,d\n1,2,3,4\nx,y,z,zz");
655         CSVFormat.DEFAULT.withHeader().withNullString("").withAllowMissingColumnNames().parse(in).iterator();
656     }
657 
658     @Test
659     public void testHeadersMissing() throws Exception {
660         final Reader in = new StringReader("a,,c,,d\n1,2,3,4\nx,y,z,zz");
661         CSVFormat.DEFAULT.withHeader().withAllowMissingColumnNames().parse(in).iterator();
662     }
663 
664     @Test(expected = IllegalArgumentException.class)
665     public void testHeadersMissingException() throws Exception {
666         final Reader in = new StringReader("a,,c,,d\n1,2,3,4\nx,y,z,zz");
667         CSVFormat.DEFAULT.withHeader().parse(in).iterator();
668     }
669 
670     @Test
671     public void testIgnoreCaseHeaderMapping() throws Exception {
672         final Reader in = new StringReader("1,2,3");
673         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase()
674                 .parse(in).iterator();
675         final CSVRecord record = records.next();
676         assertEquals("1", record.get("one"));
677         assertEquals("2", record.get("two"));
678         assertEquals("3", record.get("THREE"));
679     }
680 
681     @Test
682     public void testIgnoreEmptyLines() throws IOException {
683         final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
684         // String code = "world\r\n\n";
685         // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
686         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
687             final List<CSVRecord> records = parser.getRecords();
688             assertEquals(3, records.size());
689         }
690     }
691 
692     @Test(expected = IllegalArgumentException.class)
693     public void testInvalidFormat() throws Exception {
694         final CSVFormat invalidFormat = CSVFormat.DEFAULT.withDelimiter(CR);
695         try (final CSVParser parser = new CSVParser(null, invalidFormat)) {
696             Assert.fail("This test should have thrown an exception.");
697         }
698     }
699 
700     @Test
701     public void testIterator() throws Exception {
702         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
703 
704         final Iterator<CSVRecord> iterator = CSVFormat.DEFAULT.parse(in).iterator();
705 
706         assertTrue(iterator.hasNext());
707         try {
708             iterator.remove();
709             fail("expected UnsupportedOperationException");
710         } catch (final UnsupportedOperationException expected) {
711             // expected
712         }
713         assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values());
714         assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values());
715         assertTrue(iterator.hasNext());
716         assertTrue(iterator.hasNext());
717         assertTrue(iterator.hasNext());
718         assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values());
719         assertFalse(iterator.hasNext());
720 
721         try {
722             iterator.next();
723             fail("NoSuchElementException expected");
724         } catch (final NoSuchElementException e) {
725             // expected
726         }
727     }
728 
729     @Test
730     public void testLineFeedEndings() throws IOException {
731         final String code = "foo\nbaar,\nhello,world\n,kanu";
732         try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
733             final List<CSVRecord> records = parser.getRecords();
734             assertEquals(4, records.size());
735         }
736     }
737 
738     @Test
739     public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
740         final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
741         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)
742                 .iterator();
743         CSVRecord record;
744 
745         // 1st record
746         record = records.next();
747         assertTrue(record.isMapped("A"));
748         assertTrue(record.isMapped("B"));
749         assertTrue(record.isMapped("C"));
750         assertTrue(record.isSet("A"));
751         assertTrue(record.isSet("B"));
752         assertFalse(record.isSet("C"));
753         assertEquals("1", record.get("A"));
754         assertEquals("2", record.get("B"));
755         assertFalse(record.isConsistent());
756 
757         // 2nd record
758         record = records.next();
759         assertTrue(record.isMapped("A"));
760         assertTrue(record.isMapped("B"));
761         assertTrue(record.isMapped("C"));
762         assertTrue(record.isSet("A"));
763         assertTrue(record.isSet("B"));
764         assertTrue(record.isSet("C"));
765         assertEquals("x", record.get("A"));
766         assertEquals("y", record.get("B"));
767         assertEquals("z", record.get("C"));
768         assertTrue(record.isConsistent());
769 
770         assertFalse(records.hasNext());
771     }
772 
773     @Test
774     // TODO this may lead to strange behavior, throw an exception if iterator() has already been called?
775     public void testMultipleIterators() throws Exception {
776         try (final CSVParser parser = CSVParser.parse("a,b,c" + CR + "d,e,f", CSVFormat.DEFAULT)) {
777             final Iterator<CSVRecord> itr1 = parser.iterator();
778             final Iterator<CSVRecord> itr2 = parser.iterator();
779 
780             final CSVRecord first = itr1.next();
781             assertEquals("a", first.get(0));
782             assertEquals("b", first.get(1));
783             assertEquals("c", first.get(2));
784 
785             final CSVRecord second = itr2.next();
786             assertEquals("d", second.get(0));
787             assertEquals("e", second.get(1));
788             assertEquals("f", second.get(2));
789         }
790     }
791 
792     @Test(expected = IllegalArgumentException.class)
793     public void testNewCSVParserNullReaderFormat() throws Exception {
794         try (final CSVParser parser = new CSVParser(null, CSVFormat.DEFAULT)) {
795             Assert.fail("This test should have thrown an exception.");
796         }
797     }
798 
799     @Test(expected = IllegalArgumentException.class)
800     public void testNewCSVParserReaderNullFormat() throws Exception {
801         try (final CSVParser parser = new CSVParser(new StringReader(""), null)) {
802             Assert.fail("This test should have thrown an exception.");
803         }
804     }
805 
806     @Test
807     public void testNoHeaderMap() throws Exception {
808         try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) {
809             Assert.assertNull(parser.getHeaderMap());
810         }
811     }
812 
813     @Test(expected = IllegalArgumentException.class)
814     public void testParseFileNullFormat() throws Exception {
815         CSVParser.parse(new File(""), Charset.defaultCharset(), null);
816     }
817 
818     @Test(expected = IllegalArgumentException.class)
819     public void testParseNullFileFormat() throws Exception {
820         CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT);
821     }
822 
823     @Test(expected = IllegalArgumentException.class)
824     public void testParseNullStringFormat() throws Exception {
825         CSVParser.parse((String) null, CSVFormat.DEFAULT);
826     }
827 
828     @Test(expected = IllegalArgumentException.class)
829     public void testParseNullUrlCharsetFormat() throws Exception {
830         CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT);
831     }
832 
833     @Test(expected = IllegalArgumentException.class)
834     public void testParserUrlNullCharsetFormat() throws Exception {
835         try (final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), null, CSVFormat.DEFAULT)) {
836             Assert.fail("This test should have thrown an exception.");
837         }
838     }
839 
840     @Test(expected = IllegalArgumentException.class)
841     public void testParseStringNullFormat() throws Exception {
842         CSVParser.parse("csv data", null);
843     }
844 
845     @Test(expected = IllegalArgumentException.class)
846     public void testParseUrlCharsetNullFormat() throws Exception {
847         try (final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), Charset.defaultCharset(), null)) {
848             Assert.fail("This test should have thrown an exception.");
849         }
850     }
851 
852     @Test
853     public void testProvidedHeader() throws Exception {
854         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
855 
856         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();
857 
858         for (int i = 0; i < 3; i++) {
859             assertTrue(records.hasNext());
860             final CSVRecord record = records.next();
861             assertTrue(record.isMapped("A"));
862             assertTrue(record.isMapped("B"));
863             assertTrue(record.isMapped("C"));
864             assertFalse(record.isMapped("NOT MAPPED"));
865             assertEquals(record.get(0), record.get("A"));
866             assertEquals(record.get(1), record.get("B"));
867             assertEquals(record.get(2), record.get("C"));
868         }
869 
870         assertFalse(records.hasNext());
871     }
872 
873     @Test
874     public void testProvidedHeaderAuto() throws Exception {
875         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
876 
877         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
878 
879         for (int i = 0; i < 2; i++) {
880             assertTrue(records.hasNext());
881             final CSVRecord record = records.next();
882             assertTrue(record.isMapped("a"));
883             assertTrue(record.isMapped("b"));
884             assertTrue(record.isMapped("c"));
885             assertFalse(record.isMapped("NOT MAPPED"));
886             assertEquals(record.get(0), record.get("a"));
887             assertEquals(record.get(1), record.get("b"));
888             assertEquals(record.get(2), record.get("c"));
889         }
890 
891         assertFalse(records.hasNext());
892     }
893 
894     @Test
895     public void testRoundtrip() throws Exception {
896         final StringWriter out = new StringWriter();
897         try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) {
898             final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n";
899             for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) {
900                 printer.printRecord(record);
901             }
902             assertEquals(input, out.toString());
903         }
904     }
905 
906     @Test
907     public void testSkipAutoHeader() throws Exception {
908         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
909         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader().parse(in).iterator();
910         final CSVRecord record = records.next();
911         assertEquals("1", record.get("a"));
912         assertEquals("2", record.get("b"));
913         assertEquals("3", record.get("c"));
914     }
915 
916     @Test
917     public void testSkipHeaderOverrideDuplicateHeaders() throws Exception {
918         final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z");
919         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
920                 .iterator();
921         final CSVRecord record = records.next();
922         assertEquals("1", record.get("X"));
923         assertEquals("2", record.get("Y"));
924         assertEquals("3", record.get("Z"));
925     }
926 
927     @Test
928     public void testSkipSetAltHeaders() throws Exception {
929         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
930         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
931                 .iterator();
932         final CSVRecord record = records.next();
933         assertEquals("1", record.get("X"));
934         assertEquals("2", record.get("Y"));
935         assertEquals("3", record.get("Z"));
936     }
937 
938     @Test
939     public void testSkipSetHeader() throws Exception {
940         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
941         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)
942                 .iterator();
943         final CSVRecord record = records.next();
944         assertEquals("1", record.get("a"));
945         assertEquals("2", record.get("b"));
946         assertEquals("3", record.get("c"));
947     }
948 
949     @Test
950     @Ignore
951     public void testStartWithEmptyLinesThenHeaders() throws Exception {
952         final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
953                 "hello,\"\"\n\n\n" };
954         final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
955                 { "" } };
956         for (final String code : codes) {
957             try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
958                 final List<CSVRecord> records = parser.getRecords();
959                 assertEquals(res.length, records.size());
960                 assertTrue(records.size() > 0);
961                 for (int i = 0; i < res.length; i++) {
962                     assertArrayEquals(res[i], records.get(i).values());
963                 }
964             }
965         }
966     }
967 
968     @Test
969     public void testTrailingDelimiter() throws Exception {
970         final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,");
971         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
972                 .withTrailingDelimiter().parse(in).iterator();
973         final CSVRecord record = records.next();
974         assertEquals("1", record.get("X"));
975         assertEquals("2", record.get("Y"));
976         assertEquals("3", record.get("Z"));
977         Assert.assertEquals(3, record.size());
978     }
979 
980     @Test
981     public void testTrim() throws Exception {
982         final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
983         final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
984                 .withTrim().parse(in).iterator();
985         final CSVRecord record = records.next();
986         assertEquals("1", record.get("X"));
987         assertEquals("2", record.get("Y"));
988         assertEquals("3", record.get("Z"));
989         Assert.assertEquals(3, record.size());
990     }
991 
992     private void validateLineNumbers(final String lineSeparator) throws IOException {
993         try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
994                 CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
995             assertEquals(0, parser.getCurrentLineNumber());
996             assertNotNull(parser.nextRecord());
997             assertEquals(1, parser.getCurrentLineNumber());
998             assertNotNull(parser.nextRecord());
999             assertEquals(2, parser.getCurrentLineNumber());
1000             assertNotNull(parser.nextRecord());
1001             // Still 2 because the last line is does not have EOL chars
1002             assertEquals(2, parser.getCurrentLineNumber());
1003             assertNull(parser.nextRecord());
1004             // Still 2 because the last line is does not have EOL chars
1005             assertEquals(2, parser.getCurrentLineNumber());
1006         }
1007     }
1008 
1009     private void validateRecordNumbers(final String lineSeparator) throws IOException {
1010         try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
1011                 CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
1012             CSVRecord record;
1013             assertEquals(0, parser.getRecordNumber());
1014             assertNotNull(record = parser.nextRecord());
1015             assertEquals(1, record.getRecordNumber());
1016             assertEquals(1, parser.getRecordNumber());
1017             assertNotNull(record = parser.nextRecord());
1018             assertEquals(2, record.getRecordNumber());
1019             assertEquals(2, parser.getRecordNumber());
1020             assertNotNull(record = parser.nextRecord());
1021             assertEquals(3, record.getRecordNumber());
1022             assertEquals(3, parser.getRecordNumber());
1023             assertNull(record = parser.nextRecord());
1024             assertEquals(3, parser.getRecordNumber());
1025         }
1026     }
1027 
1028     private void validateRecordPosition(final String lineSeparator) throws IOException {
1029         final String nl = lineSeparator; // used as linebreak in values for better distinction
1030 
1031         final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
1032                 // to see if recordPosition correctly points to the enclosing quote
1033                 "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
1034                 // unicode test... not very relevant while operating on strings instead of bytes, but for
1035                 // completeness...
1036                 "\u00c4,\u00d6,\u00dc" + lineSeparator + "EOF,EOF,EOF";
1037 
1038         final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(lineSeparator);
1039         CSVParser parser = CSVParser.parse(code, format);
1040 
1041         CSVRecord record;
1042         assertEquals(0, parser.getRecordNumber());
1043 
1044         assertNotNull(record = parser.nextRecord());
1045         assertEquals(1, record.getRecordNumber());
1046         assertEquals(code.indexOf('a'), record.getCharacterPosition());
1047 
1048         assertNotNull(record = parser.nextRecord());
1049         assertEquals(2, record.getRecordNumber());
1050         assertEquals(code.indexOf('1'), record.getCharacterPosition());
1051 
1052         assertNotNull(record = parser.nextRecord());
1053         final long positionRecord3 = record.getCharacterPosition();
1054         assertEquals(3, record.getRecordNumber());
1055         assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1056         assertEquals("A" + lineSeparator + "A", record.get(0));
1057         assertEquals("B" + lineSeparator + "B", record.get(1));
1058         assertEquals("CC", record.get(2));
1059 
1060         assertNotNull(record = parser.nextRecord());
1061         assertEquals(4, record.getRecordNumber());
1062         assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1063 
1064         assertNotNull(record = parser.nextRecord());
1065         assertEquals(5, record.getRecordNumber());
1066         assertEquals(code.indexOf("EOF"), record.getCharacterPosition());
1067 
1068         parser.close();
1069 
1070         // now try to read starting at record 3
1071         parser = new CSVParser(new StringReader(code.substring((int) positionRecord3)), format, positionRecord3, 3);
1072 
1073         assertNotNull(record = parser.nextRecord());
1074         assertEquals(3, record.getRecordNumber());
1075         assertEquals(code.indexOf("'A"), record.getCharacterPosition());
1076         assertEquals("A" + lineSeparator + "A", record.get(0));
1077         assertEquals("B" + lineSeparator + "B", record.get(1));
1078         assertEquals("CC", record.get(2));
1079 
1080         assertNotNull(record = parser.nextRecord());
1081         assertEquals(4, record.getRecordNumber());
1082         assertEquals(code.indexOf('\u00c4'), record.getCharacterPosition());
1083         assertEquals("\u00c4", record.get(0));
1084 
1085         parser.close();
1086     }
1087 }