View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Token.Type.TOKEN;
21  
22  import java.io.Closeable;
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.io.Reader;
28  import java.io.StringReader;
29  import java.io.UncheckedIOException;
30  import java.net.URL;
31  import java.nio.charset.Charset;
32  import java.nio.file.Files;
33  import java.nio.file.Path;
34  import java.util.ArrayList;
35  import java.util.Arrays;
36  import java.util.Collections;
37  import java.util.Iterator;
38  import java.util.LinkedHashMap;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.NoSuchElementException;
42  import java.util.Objects;
43  import java.util.Spliterator;
44  import java.util.Spliterators;
45  import java.util.TreeMap;
46  import java.util.stream.Collectors;
47  import java.util.stream.Stream;
48  import java.util.stream.StreamSupport;
49  
50  /**
51   * Parses CSV files according to the specified format.
52   *
53   * Because CSV appears in many different dialects, the parser supports many formats by allowing the
54   * specification of a {@link CSVFormat}.
55   *
56   * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
57   *
58   * <h2>Creating instances</h2>
59   * <p>
60   * There are several static factory methods that can be used to create instances for various types of resources:
61   * </p>
62   * <ul>
63   *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
64   *     <li>{@link #parse(String, CSVFormat)}</li>
65   *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
66   * </ul>
67   * <p>
68   * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
69   *
70   * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
71   * </p>
72   * <pre>
73   * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
74   *     ...
75   * }
76   * </pre>
77   *
78   * <h2>Parsing record wise</h2>
79   * <p>
80   * To parse a CSV input from a file, you write:
81   * </p>
82   *
83   * <pre>
84   * File csvData = new File(&quot;/path/to/csv&quot;);
85   * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
86   * for (CSVRecord csvRecord : parser) {
87   *     ...
88   * }
89   * </pre>
90   *
91   * <p>
92   * This will read the parse the contents of the file using the
93   * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
94   * </p>
95   *
96   * <p>
97   * To parse CSV input in a format like Excel, you write:
98   * </p>
99   *
100  * <pre>
101  * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
102  * for (CSVRecord csvRecord : parser) {
103  *     ...
104  * }
105  * </pre>
106  *
107  * <p>
108  * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
109  * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
110  * </p>
111  *
112  * <h2>Parsing into memory</h2>
113  * <p>
114  * If parsing record wise is not desired, the contents of the input can be read completely into memory.
115  * </p>
116  *
117  * <pre>
118  * Reader in = new StringReader(&quot;a;b\nc;d&quot;);
119  * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
120  * List&lt;CSVRecord&gt; list = parser.getRecords();
121  * </pre>
122  *
123  * <p>
124  * There are two constraints that have to be kept in mind:
125  * </p>
126  *
127  * <ol>
128  *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
129  *     the input, those records will not end up in the in memory representation of your CSV data.</li>
130  *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
131  *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
132  * </ol>
133  *
134  * <h2>Notes</h2>
135  * <p>
136  * Internal parser state is completely covered by the format and the reader-state.
137  * </p>
138  *
139  * @see <a href="package-summary.html">package documentation for more details</a>
140  */
141 public final class CSVParser implements Iterable<CSVRecord>, Closeable {
142 
143     class CSVRecordIterator implements Iterator<CSVRecord> {
144         private CSVRecord current;
145 
146         private CSVRecord getNextRecord() {
147             try {
148                 return CSVParser.this.nextRecord();
149             } catch (final IOException e) {
150                 throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
151             }
152         }
153 
154         @Override
155         public boolean hasNext() {
156             if (CSVParser.this.isClosed()) {
157                 return false;
158             }
159             if (this.current == null) {
160                 this.current = this.getNextRecord();
161             }
162 
163             return this.current != null;
164         }
165 
166         @Override
167         public CSVRecord next() {
168             if (CSVParser.this.isClosed()) {
169                 throw new NoSuchElementException("CSVParser has been closed");
170             }
171             CSVRecord next = this.current;
172             this.current = null;
173 
174             if (next == null) {
175                 // hasNext() wasn't called before
176                 next = this.getNextRecord();
177                 if (next == null) {
178                     throw new NoSuchElementException("No more CSV records available");
179                 }
180             }
181 
182             return next;
183         }
184 
185         @Override
186         public void remove() {
187             throw new UnsupportedOperationException();
188         }
189     }
190 
191     /**
192      * Header information based on name and position.
193      */
194     private static final class Headers {
195 
196         /**
197          * Header column positions (0-based)
198          */
199         final Map<String, Integer> headerMap;
200 
201         /**
202          * Header names in column order
203          */
204         final List<String> headerNames;
205 
206         Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
207             this.headerMap = headerMap;
208             this.headerNames = headerNames;
209         }
210     }
211 
212     /**
213      * Creates a parser for the given {@link File}.
214      *
215      * @param file
216      *            a CSV file. Must not be null.
217      * @param charset
218      *            The Charset to decode the given file.
219      * @param format
220      *            the CSVFormat used for CSV parsing. Must not be null.
221      * @return a new parser
222      * @throws IllegalArgumentException
223      *             If the parameters of the format are inconsistent or if either file or format are null.
224      * @throws IOException
225      *             If an I/O error occurs
226      */
227     public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
228         Objects.requireNonNull(file, "file");
229         return parse(file.toPath(), charset, format);
230     }
231 
232     /**
233      * Creates a CSV parser using the given {@link CSVFormat}.
234      *
235      * <p>
236      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
237      * unless you close the {@code reader}.
238      * </p>
239      *
240      * @param inputStream
241      *            an InputStream containing CSV-formatted input. Must not be null.
242      * @param charset
243      *            The Charset to decode the given file.
244      * @param format
245      *            the CSVFormat used for CSV parsing. Must not be null.
246      * @return a new CSVParser configured with the given reader and format.
247      * @throws IllegalArgumentException
248      *             If the parameters of the format are inconsistent or if either reader or format are null.
249      * @throws IOException
250      *             If there is a problem reading the header or skipping the first record
251      * @since 1.5
252      */
253     @SuppressWarnings("resource")
254     public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
255             throws IOException {
256         Objects.requireNonNull(inputStream, "inputStream");
257         Objects.requireNonNull(format, "format");
258         return parse(new InputStreamReader(inputStream, charset), format);
259     }
260 
261     /**
262      * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
263      *
264      * @param path
265      *            a CSV file. Must not be null.
266      * @param charset
267      *            The Charset to decode the given file.
268      * @param format
269      *            the CSVFormat used for CSV parsing. Must not be null.
270      * @return a new parser
271      * @throws IllegalArgumentException
272      *             If the parameters of the format are inconsistent or if either file or format are null.
273      * @throws IOException
274      *             If an I/O error occurs
275      * @since 1.5
276      */
277     @SuppressWarnings("resource")
278     public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
279         Objects.requireNonNull(path, "path");
280         Objects.requireNonNull(format, "format");
281         return parse(Files.newInputStream(path), charset, format);
282     }
283 
284     /**
285      * Creates a CSV parser using the given {@link CSVFormat}
286      *
287      * <p>
288      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
289      * unless you close the {@code reader}.
290      * </p>
291      *
292      * @param reader
293      *            a Reader containing CSV-formatted input. Must not be null.
294      * @param format
295      *            the CSVFormat used for CSV parsing. Must not be null.
296      * @return a new CSVParser configured with the given reader and format.
297      * @throws IllegalArgumentException
298      *             If the parameters of the format are inconsistent or if either reader or format are null.
299      * @throws IOException
300      *             If there is a problem reading the header or skipping the first record
301      * @since 1.5
302      */
303     public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
304         return new CSVParser(reader, format);
305     }
306 
307     // the following objects are shared to reduce garbage
308 
309     /**
310      * Creates a parser for the given {@link String}.
311      *
312      * @param string
313      *            a CSV string. Must not be null.
314      * @param format
315      *            the CSVFormat used for CSV parsing. Must not be null.
316      * @return a new parser
317      * @throws IllegalArgumentException
318      *             If the parameters of the format are inconsistent or if either string or format are null.
319      * @throws IOException
320      *             If an I/O error occurs
321      */
322     public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
323         Objects.requireNonNull(string, "string");
324         Objects.requireNonNull(format, "format");
325 
326         return new CSVParser(new StringReader(string), format);
327     }
328 
329     /**
330      * Creates and returns a parser for the given URL, which the caller MUST close.
331      *
332      * <p>
333      * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
334      * you close the {@code url}.
335      * </p>
336      *
337      * @param url
338      *            a URL. Must not be null.
339      * @param charset
340      *            the charset for the resource. Must not be null.
341      * @param format
342      *            the CSVFormat used for CSV parsing. Must not be null.
343      * @return a new parser
344      * @throws IllegalArgumentException
345      *             If the parameters of the format are inconsistent or if either url, charset or format are null.
346      * @throws IOException
347      *             If an I/O error occurs
348      */
349     @SuppressWarnings("resource")
350     public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
351         Objects.requireNonNull(url, "url");
352         Objects.requireNonNull(charset, "charset");
353         Objects.requireNonNull(format, "format");
354 
355         return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
356     }
357 
358     private String headerComment;
359 
360     private String trailerComment;
361 
362     private final CSVFormat format;
363 
364     private final Headers headers;
365 
366     private final Lexer lexer;
367 
368     private final CSVRecordIterator csvRecordIterator;
369 
370     /** A record buffer for getRecord(). Grows as necessary and is reused. */
371     private final List<String> recordList = new ArrayList<>();
372 
373     /**
374      * The next record number to assign.
375      */
376     private long recordNumber;
377 
378     /**
379      * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
380      * with {@link #recordNumber}.
381      */
382     private final long characterOffset;
383 
384     private final Token reusableToken = new Token();
385 
386     /**
387      * Constructs a new instance using the given {@link CSVFormat}
388      *
389      * <p>
390      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
391      * unless you close the {@code reader}.
392      * </p>
393      *
394      * @param reader
395      *            a Reader containing CSV-formatted input. Must not be null.
396      * @param format
397      *            the CSVFormat used for CSV parsing. Must not be null.
398      * @throws IllegalArgumentException
399      *             If the parameters of the format are inconsistent or if either reader or format are null.
400      * @throws IOException
401      *             If there is a problem reading the header or skipping the first record
402      */
403     public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
404         this(reader, format, 0, 1);
405     }
406 
407     /**
408      * Constructs a new instance using the given {@link CSVFormat}
409      *
410      * <p>
411      * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
412      * unless you close the {@code reader}.
413      * </p>
414      *
415      * @param reader
416      *            a Reader containing CSV-formatted input. Must not be null.
417      * @param format
418      *            the CSVFormat used for CSV parsing. Must not be null.
419      * @param characterOffset
420      *            Lexer offset when the parser does not start parsing at the beginning of the source.
421      * @param recordNumber
422      *            The next record number to assign
423      * @throws IllegalArgumentException
424      *             If the parameters of the format are inconsistent or if either reader or format are null.
425      * @throws IOException
426      *             If there is a problem reading the header or skipping the first record
427      * @since 1.1
428      */
429     @SuppressWarnings("resource")
430     public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
431         throws IOException {
432         Objects.requireNonNull(reader, "reader");
433         Objects.requireNonNull(format, "format");
434 
435         this.format = format.copy();
436         this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
437         this.csvRecordIterator = new CSVRecordIterator();
438         this.headers = createHeaders();
439         this.characterOffset = characterOffset;
440         this.recordNumber = recordNumber - 1;
441     }
442 
443     private void addRecordValue(final boolean lastRecord) {
444         final String input = this.format.trim(this.reusableToken.content.toString());
445         if (lastRecord && input.isEmpty() && this.format.getTrailingDelimiter()) {
446             return;
447         }
448         this.recordList.add(handleNull(input));
449     }
450 
451     /**
452      * Closes resources.
453      *
454      * @throws IOException
455      *             If an I/O error occurs
456      */
457     @Override
458     public void close() throws IOException {
459         if (this.lexer != null) {
460             this.lexer.close();
461         }
462     }
463 
464     private Map<String, Integer> createEmptyHeaderMap() {
465         return this.format.getIgnoreHeaderCase() ?
466                 new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
467                 new LinkedHashMap<>();
468     }
469 
470     /**
471      * Creates the name to index mapping if the format defines a header.
472      *
473      * @return null if the format has no header.
474      * @throws IOException if there is a problem reading the header or skipping the first record
475      */
476     private Headers createHeaders() throws IOException {
477         Map<String, Integer> hdrMap = null;
478         List<String> headerNames = null;
479         final String[] formatHeader = this.format.getHeader();
480         if (formatHeader != null) {
481             hdrMap = createEmptyHeaderMap();
482             String[] headerRecord = null;
483             if (formatHeader.length == 0) {
484                 // read the header from the first line of the file
485                 final CSVRecord nextRecord = this.nextRecord();
486                 if (nextRecord != null) {
487                     headerRecord = nextRecord.values();
488                     headerComment = nextRecord.getComment();
489                 }
490             } else {
491                 if (this.format.getSkipHeaderRecord()) {
492                     final CSVRecord nextRecord = this.nextRecord();
493                     if (nextRecord != null) {
494                         headerComment = nextRecord.getComment();
495                     }
496                 }
497                 headerRecord = formatHeader;
498             }
499 
500             // build the name to index mappings
501             if (headerRecord != null) {
502                 // Track an occurrence of a null, empty or blank header.
503                 boolean observedMissing = false;
504                 for (int i = 0; i < headerRecord.length; i++) {
505                     final String header = headerRecord[i];
506                     final boolean blankHeader = CSVFormat.isBlank(header);
507                     if (blankHeader && !this.format.getAllowMissingColumnNames()) {
508                         throw new IllegalArgumentException(
509                             "A header name is missing in " + Arrays.toString(headerRecord));
510                     }
511 
512                     final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
513                     final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
514                     final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
515                     final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
516 
517                     if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
518                         throw new IllegalArgumentException(
519                             String.format(
520                                 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
521                                 header, Arrays.toString(headerRecord)));
522                     }
523                     observedMissing |= blankHeader;
524                     if (header != null) {
525                         hdrMap.put(header, Integer.valueOf(i));
526                         if (headerNames == null) {
527                             headerNames = new ArrayList<>(headerRecord.length);
528                         }
529                         headerNames.add(header);
530                     }
531                 }
532             }
533         }
534         if (headerNames == null) {
535             headerNames = Collections.emptyList(); // immutable
536         } else {
537             headerNames = Collections.unmodifiableList(headerNames);
538         }
539         return new Headers(hdrMap, headerNames);
540     }
541 
542     /**
543      * Gets the current line number in the input stream.
544      *
545      * <p>
546      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
547      * the record number.
548      * </p>
549      *
550      * @return current line number
551      */
552     public long getCurrentLineNumber() {
553         return this.lexer.getCurrentLineNumber();
554     }
555 
556     /**
557      * Gets the first end-of-line string encountered.
558      *
559      * @return the first end-of-line string
560      * @since 1.5
561      */
562     public String getFirstEndOfLine() {
563         return lexer.getFirstEol();
564     }
565 
566     /**
567      * Gets the header comment, if any.
568      * The header comment appears before the header record.
569      *
570      * @return the header comment for this stream, or null if no comment is available.
571      * @since 1.10.0
572      */
573     public String getHeaderComment() {
574         return headerComment;
575     }
576 
577     /**
578      * Gets a copy of the header map as defined in the CSVFormat's header.
579      * <p>
580      * The map keys are column names. The map values are 0-based indices.
581      * </p>
582      * <p>
583      * Note: The map can only provide a one-to-one mapping when the format did not
584      * contain null or duplicate column names.
585      * </p>
586      *
587      * @return a copy of the header map.
588      */
589     public Map<String, Integer> getHeaderMap() {
590         if (this.headers.headerMap == null) {
591             return null;
592         }
593         final Map<String, Integer> map = createEmptyHeaderMap();
594         map.putAll(this.headers.headerMap);
595         return map;
596     }
597 
598     /**
599      * Gets the underlying header map.
600      *
601      * @return the underlying header map.
602      */
603     Map<String, Integer> getHeaderMapRaw() {
604         return this.headers.headerMap;
605     }
606 
607     /**
608      * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
609      * <p>
610      * Note: The list provides strings that can be used as keys in the header map.
611      * The list will not contain null column names if they were present in the input
612      * format.
613      * </p>
614      *
615      * @return read-only list of header names that iterates in column order.
616      * @see #getHeaderMap()
617      * @since 1.7
618      */
619     public List<String> getHeaderNames() {
620         return Collections.unmodifiableList(headers.headerNames);
621     }
622 
623     /**
624      * Gets the current record number in the input stream.
625      *
626      * <p>
627      * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
628      * the line number.
629      * </p>
630      *
631      * @return current record number
632      */
633     public long getRecordNumber() {
634         return this.recordNumber;
635     }
636 
637     /**
638      * Parses the CSV input according to the given format and returns the content as a list of
639      * {@link CSVRecord CSVRecords}.
640      *
641      * <p>
642      * The returned content starts at the current parse-position in the stream.
643      * </p>
644      *
645      * @return list of {@link CSVRecord CSVRecords}, may be empty
646      * @throws UncheckedIOException
647      *             on parse error or input read-failure
648      */
649     public List<CSVRecord> getRecords() {
650         return stream().collect(Collectors.toList());
651     }
652 
653     /**
654      * Gets the trailer comment, if any.
655      * Trailer comments are located between the last record and EOF
656      *
657      * @return the trailer comment for this stream, or null if no comment is available.
658      * @since 1.10.0
659      */
660     public String getTrailerComment() {
661         return trailerComment;
662     }
663 
664     /**
665      * Handle whether input is parsed as null
666      *
667      * @param input
668      *           the cell data to further processed
669      * @return null if input is parsed as null, or input itself if input isn't parsed as null
670      */
671     private String handleNull(final String input) {
672         final boolean isQuoted = this.reusableToken.isQuoted;
673         final String nullString = format.getNullString();
674         final boolean strictQuoteMode = isStrictQuoteMode();
675         if (input.equals(nullString)) {
676             // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
677             return strictQuoteMode && isQuoted ? input : null;
678         }
679         // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
680         return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
681     }
682 
683     /**
684      * Checks whether there is a header comment.
685      * The header comment appears before the header record.
686      * Note that if the parser's format has been given an explicit header
687      * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
688      * and the header record is not being skipped
689      * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
690      * will be associated with the first record, not the header.
691      *
692      * @return true if this parser has seen a header comment, false otherwise
693      * @since 1.10.0
694      */
695     public boolean hasHeaderComment() {
696         return headerComment != null;
697     }
698 
699     /**
700      * Checks whether there is a trailer comment.
701      * Trailer comments are located between the last record and EOF.
702      * The trailer comments will only be available after the parser has
703      * finished processing this stream.
704      *
705      * @return true if this parser has seen a trailer comment, false otherwise
706      * @since 1.10.0
707      */
708     public boolean hasTrailerComment() {
709         return trailerComment != null;
710     }
711 
712     /**
713      * Tests whether this parser is closed.
714      *
715      * @return whether this parser is closed.
716      */
717     public boolean isClosed() {
718         return this.lexer.isClosed();
719     }
720 
721     /**
722      * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
723      *
724      * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
725      *         {@link QuoteMode#NON_NUMERIC}.
726      */
727     private boolean isStrictQuoteMode() {
728         return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
729                this.format.getQuoteMode() == QuoteMode.NON_NUMERIC;
730     }
731 
732     /**
733      * Returns the record iterator.
734      *
735      * <p>
736      * An {@link IOException} caught during the iteration are re-thrown as an
737      * {@link IllegalStateException}.
738      * </p>
739      * <p>
740      * If the parser is closed, the iterator will not yield any more records.
741      * A call to {@link Iterator#hasNext()} will return {@code false} and
742      * a call to {@link Iterator#next()} will throw a
743      * {@link NoSuchElementException}.
744      * </p>
745      * <p>
746      * If it is necessary to construct an iterator which is usable after the
747      * parser is closed, one option is to extract all records as a list with
748      * {@link #getRecords()}, and return an iterator to that list.
749      * </p>
750      */
751     @Override
752     public Iterator<CSVRecord> iterator() {
753         return csvRecordIterator;
754     }
755 
756     /**
757      * Parses the next record from the current point in the stream.
758      *
759      * @return the record as an array of values, or {@code null} if the end of the stream has been reached
760      * @throws IOException
761      *             on parse error or input read-failure
762      */
763     CSVRecord nextRecord() throws IOException {
764         CSVRecord result = null;
765         this.recordList.clear();
766         StringBuilder sb = null;
767         final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
768         do {
769             this.reusableToken.reset();
770             this.lexer.nextToken(this.reusableToken);
771             switch (this.reusableToken.type) {
772             case TOKEN:
773                 this.addRecordValue(false);
774                 break;
775             case EORECORD:
776                 this.addRecordValue(true);
777                 break;
778             case EOF:
779                 if (this.reusableToken.isReady) {
780                     this.addRecordValue(true);
781                 } else if (sb != null) {
782                     trailerComment = sb.toString();
783                 }
784                 break;
785             case INVALID:
786                 throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
787             case COMMENT: // Ignored currently
788                 if (sb == null) { // first comment for this record
789                     sb = new StringBuilder();
790                 } else {
791                     sb.append(Constants.LF);
792                 }
793                 sb.append(this.reusableToken.content);
794                 this.reusableToken.type = TOKEN; // Read another token
795                 break;
796             default:
797                 throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
798             }
799         } while (this.reusableToken.type == TOKEN);
800 
801         if (!this.recordList.isEmpty()) {
802             this.recordNumber++;
803             final String comment = sb == null ? null : sb.toString();
804             result = new CSVRecord(this, this.recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
805                 this.recordNumber, startCharPosition);
806         }
807         return result;
808     }
809 
810     /**
811      * Returns a sequential {@code Stream} with this collection as its source.
812      * <p>
813      * If the parser is closed, the stream will not produce any more values.
814      * See the comments in {@link #iterator()}.
815      * </p>
816      * @return a sequential {@code Stream} with this collection as its source.
817      * @since 1.9.0
818      */
819     public Stream<CSVRecord> stream() {
820         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
821     }
822 
823 }