001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Token.Type.TOKEN;
021
022import java.io.Closeable;
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.io.Reader;
028import java.io.StringReader;
029import java.io.UncheckedIOException;
030import java.net.URL;
031import java.nio.charset.Charset;
032import java.nio.file.Files;
033import java.nio.file.Path;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Iterator;
038import java.util.LinkedHashMap;
039import java.util.List;
040import java.util.Map;
041import java.util.NoSuchElementException;
042import java.util.Objects;
043import java.util.Spliterator;
044import java.util.Spliterators;
045import java.util.TreeMap;
046import java.util.stream.Collectors;
047import java.util.stream.Stream;
048import java.util.stream.StreamSupport;
049
050/**
051 * Parses CSV files according to the specified format.
052 *
053 * Because CSV appears in many different dialects, the parser supports many formats by allowing the
054 * specification of a {@link CSVFormat}.
055 *
056 * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
057 *
058 * <h2>Creating instances</h2>
059 * <p>
060 * There are several static factory methods that can be used to create instances for various types of resources:
061 * </p>
062 * <ul>
063 *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
064 *     <li>{@link #parse(String, CSVFormat)}</li>
065 *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
066 * </ul>
067 * <p>
068 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
069 *
070 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
071 * </p>
072 * <pre>
073 * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
074 *     ...
075 * }
076 * </pre>
077 *
078 * <h2>Parsing record wise</h2>
079 * <p>
080 * To parse a CSV input from a file, you write:
081 * </p>
082 *
083 * <pre>
084 * File csvData = new File(&quot;/path/to/csv&quot;);
085 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
086 * for (CSVRecord csvRecord : parser) {
087 *     ...
088 * }
089 * </pre>
090 *
091 * <p>
092 * This will read the parse the contents of the file using the
093 * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
094 * </p>
095 *
096 * <p>
097 * To parse CSV input in a format like Excel, you write:
098 * </p>
099 *
100 * <pre>
101 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
102 * for (CSVRecord csvRecord : parser) {
103 *     ...
104 * }
105 * </pre>
106 *
107 * <p>
108 * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
109 * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
110 * </p>
111 *
112 * <h2>Parsing into memory</h2>
113 * <p>
114 * If parsing record wise is not desired, the contents of the input can be read completely into memory.
115 * </p>
116 *
117 * <pre>
118 * Reader in = new StringReader(&quot;a;b\nc;d&quot;);
119 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
120 * List&lt;CSVRecord&gt; list = parser.getRecords();
121 * </pre>
122 *
123 * <p>
124 * There are two constraints that have to be kept in mind:
125 * </p>
126 *
127 * <ol>
128 *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
129 *     the input, those records will not end up in the in memory representation of your CSV data.</li>
130 *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
131 *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
132 * </ol>
133 *
134 * <h2>Notes</h2>
135 * <p>
136 * Internal parser state is completely covered by the format and the reader-state.
137 * </p>
138 *
139 * @see <a href="package-summary.html">package documentation for more details</a>
140 */
141public final class CSVParser implements Iterable<CSVRecord>, Closeable {
142
143    class CSVRecordIterator implements Iterator<CSVRecord> {
144        private CSVRecord current;
145
146        private CSVRecord getNextRecord() {
147            try {
148                return CSVParser.this.nextRecord();
149            } catch (final IOException e) {
150                throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
151            }
152        }
153
154        @Override
155        public boolean hasNext() {
156            if (CSVParser.this.isClosed()) {
157                return false;
158            }
159            if (this.current == null) {
160                this.current = this.getNextRecord();
161            }
162
163            return this.current != null;
164        }
165
166        @Override
167        public CSVRecord next() {
168            if (CSVParser.this.isClosed()) {
169                throw new NoSuchElementException("CSVParser has been closed");
170            }
171            CSVRecord next = this.current;
172            this.current = null;
173
174            if (next == null) {
175                // hasNext() wasn't called before
176                next = this.getNextRecord();
177                if (next == null) {
178                    throw new NoSuchElementException("No more CSV records available");
179                }
180            }
181
182            return next;
183        }
184
185        @Override
186        public void remove() {
187            throw new UnsupportedOperationException();
188        }
189    }
190
191    /**
192     * Header information based on name and position.
193     */
194    private static final class Headers {
195
196        /**
197         * Header column positions (0-based)
198         */
199        final Map<String, Integer> headerMap;
200
201        /**
202         * Header names in column order
203         */
204        final List<String> headerNames;
205
206        Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
207            this.headerMap = headerMap;
208            this.headerNames = headerNames;
209        }
210    }
211
212    /**
213     * Creates a parser for the given {@link File}.
214     *
215     * @param file
216     *            a CSV file. Must not be null.
217     * @param charset
218     *            The Charset to decode the given file.
219     * @param format
220     *            the CSVFormat used for CSV parsing. Must not be null.
221     * @return a new parser
222     * @throws IllegalArgumentException
223     *             If the parameters of the format are inconsistent or if either file or format are null.
224     * @throws IOException
225     *             If an I/O error occurs
226     */
227    public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
228        Objects.requireNonNull(file, "file");
229        return parse(file.toPath(), charset, format);
230    }
231
232    /**
233     * Creates a CSV parser using the given {@link CSVFormat}.
234     *
235     * <p>
236     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
237     * unless you close the {@code reader}.
238     * </p>
239     *
240     * @param inputStream
241     *            an InputStream containing CSV-formatted input. Must not be null.
242     * @param charset
243     *            The Charset to decode the given file.
244     * @param format
245     *            the CSVFormat used for CSV parsing. Must not be null.
246     * @return a new CSVParser configured with the given reader and format.
247     * @throws IllegalArgumentException
248     *             If the parameters of the format are inconsistent or if either reader or format are null.
249     * @throws IOException
250     *             If there is a problem reading the header or skipping the first record
251     * @since 1.5
252     */
253    @SuppressWarnings("resource")
254    public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
255            throws IOException {
256        Objects.requireNonNull(inputStream, "inputStream");
257        Objects.requireNonNull(format, "format");
258        return parse(new InputStreamReader(inputStream, charset), format);
259    }
260
261    /**
262     * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
263     *
264     * @param path
265     *            a CSV file. Must not be null.
266     * @param charset
267     *            The Charset to decode the given file.
268     * @param format
269     *            the CSVFormat used for CSV parsing. Must not be null.
270     * @return a new parser
271     * @throws IllegalArgumentException
272     *             If the parameters of the format are inconsistent or if either file or format are null.
273     * @throws IOException
274     *             If an I/O error occurs
275     * @since 1.5
276     */
277    @SuppressWarnings("resource")
278    public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
279        Objects.requireNonNull(path, "path");
280        Objects.requireNonNull(format, "format");
281        return parse(Files.newInputStream(path), charset, format);
282    }
283
284    /**
285     * Creates a CSV parser using the given {@link CSVFormat}
286     *
287     * <p>
288     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
289     * unless you close the {@code reader}.
290     * </p>
291     *
292     * @param reader
293     *            a Reader containing CSV-formatted input. Must not be null.
294     * @param format
295     *            the CSVFormat used for CSV parsing. Must not be null.
296     * @return a new CSVParser configured with the given reader and format.
297     * @throws IllegalArgumentException
298     *             If the parameters of the format are inconsistent or if either reader or format are null.
299     * @throws IOException
300     *             If there is a problem reading the header or skipping the first record
301     * @since 1.5
302     */
303    public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
304        return new CSVParser(reader, format);
305    }
306
307    // the following objects are shared to reduce garbage
308
309    /**
310     * Creates a parser for the given {@link String}.
311     *
312     * @param string
313     *            a CSV string. Must not be null.
314     * @param format
315     *            the CSVFormat used for CSV parsing. Must not be null.
316     * @return a new parser
317     * @throws IllegalArgumentException
318     *             If the parameters of the format are inconsistent or if either string or format are null.
319     * @throws IOException
320     *             If an I/O error occurs
321     */
322    public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
323        Objects.requireNonNull(string, "string");
324        Objects.requireNonNull(format, "format");
325
326        return new CSVParser(new StringReader(string), format);
327    }
328
329    /**
330     * Creates and returns a parser for the given URL, which the caller MUST close.
331     *
332     * <p>
333     * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
334     * you close the {@code url}.
335     * </p>
336     *
337     * @param url
338     *            a URL. Must not be null.
339     * @param charset
340     *            the charset for the resource. Must not be null.
341     * @param format
342     *            the CSVFormat used for CSV parsing. Must not be null.
343     * @return a new parser
344     * @throws IllegalArgumentException
345     *             If the parameters of the format are inconsistent or if either url, charset or format are null.
346     * @throws IOException
347     *             If an I/O error occurs
348     */
349    @SuppressWarnings("resource")
350    public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
351        Objects.requireNonNull(url, "url");
352        Objects.requireNonNull(charset, "charset");
353        Objects.requireNonNull(format, "format");
354
355        return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
356    }
357
358    private String headerComment;
359
360    private String trailerComment;
361
362    private final CSVFormat format;
363
364    private final Headers headers;
365
366    private final Lexer lexer;
367
368    private final CSVRecordIterator csvRecordIterator;
369
370    /** A record buffer for getRecord(). Grows as necessary and is reused. */
371    private final List<String> recordList = new ArrayList<>();
372
373    /**
374     * The next record number to assign.
375     */
376    private long recordNumber;
377
378    /**
379     * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
380     * with {@link #recordNumber}.
381     */
382    private final long characterOffset;
383
384    private final Token reusableToken = new Token();
385
386    /**
387     * Constructs a new instance using the given {@link CSVFormat}
388     *
389     * <p>
390     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
391     * unless you close the {@code reader}.
392     * </p>
393     *
394     * @param reader
395     *            a Reader containing CSV-formatted input. Must not be null.
396     * @param format
397     *            the CSVFormat used for CSV parsing. Must not be null.
398     * @throws IllegalArgumentException
399     *             If the parameters of the format are inconsistent or if either reader or format are null.
400     * @throws IOException
401     *             If there is a problem reading the header or skipping the first record
402     */
403    public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
404        this(reader, format, 0, 1);
405    }
406
407    /**
408     * Constructs a new instance using the given {@link CSVFormat}
409     *
410     * <p>
411     * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
412     * unless you close the {@code reader}.
413     * </p>
414     *
415     * @param reader
416     *            a Reader containing CSV-formatted input. Must not be null.
417     * @param format
418     *            the CSVFormat used for CSV parsing. Must not be null.
419     * @param characterOffset
420     *            Lexer offset when the parser does not start parsing at the beginning of the source.
421     * @param recordNumber
422     *            The next record number to assign
423     * @throws IllegalArgumentException
424     *             If the parameters of the format are inconsistent or if either reader or format are null.
425     * @throws IOException
426     *             If there is a problem reading the header or skipping the first record
427     * @since 1.1
428     */
429    @SuppressWarnings("resource")
430    public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
431        throws IOException {
432        Objects.requireNonNull(reader, "reader");
433        Objects.requireNonNull(format, "format");
434
435        this.format = format.copy();
436        this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
437        this.csvRecordIterator = new CSVRecordIterator();
438        this.headers = createHeaders();
439        this.characterOffset = characterOffset;
440        this.recordNumber = recordNumber - 1;
441    }
442
443    private void addRecordValue(final boolean lastRecord) {
444        final String input = this.format.trim(this.reusableToken.content.toString());
445        if (lastRecord && input.isEmpty() && this.format.getTrailingDelimiter()) {
446            return;
447        }
448        this.recordList.add(handleNull(input));
449    }
450
451    /**
452     * Closes resources.
453     *
454     * @throws IOException
455     *             If an I/O error occurs
456     */
457    @Override
458    public void close() throws IOException {
459        if (this.lexer != null) {
460            this.lexer.close();
461        }
462    }
463
464    private Map<String, Integer> createEmptyHeaderMap() {
465        return this.format.getIgnoreHeaderCase() ?
466                new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
467                new LinkedHashMap<>();
468    }
469
470    /**
471     * Creates the name to index mapping if the format defines a header.
472     *
473     * @return null if the format has no header.
474     * @throws IOException if there is a problem reading the header or skipping the first record
475     */
476    private Headers createHeaders() throws IOException {
477        Map<String, Integer> hdrMap = null;
478        List<String> headerNames = null;
479        final String[] formatHeader = this.format.getHeader();
480        if (formatHeader != null) {
481            hdrMap = createEmptyHeaderMap();
482            String[] headerRecord = null;
483            if (formatHeader.length == 0) {
484                // read the header from the first line of the file
485                final CSVRecord nextRecord = this.nextRecord();
486                if (nextRecord != null) {
487                    headerRecord = nextRecord.values();
488                    headerComment = nextRecord.getComment();
489                }
490            } else {
491                if (this.format.getSkipHeaderRecord()) {
492                    final CSVRecord nextRecord = this.nextRecord();
493                    if (nextRecord != null) {
494                        headerComment = nextRecord.getComment();
495                    }
496                }
497                headerRecord = formatHeader;
498            }
499
500            // build the name to index mappings
501            if (headerRecord != null) {
502                // Track an occurrence of a null, empty or blank header.
503                boolean observedMissing = false;
504                for (int i = 0; i < headerRecord.length; i++) {
505                    final String header = headerRecord[i];
506                    final boolean blankHeader = CSVFormat.isBlank(header);
507                    if (blankHeader && !this.format.getAllowMissingColumnNames()) {
508                        throw new IllegalArgumentException(
509                            "A header name is missing in " + Arrays.toString(headerRecord));
510                    }
511
512                    final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header);
513                    final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
514                    final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
515                    final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
516
517                    if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) {
518                        throw new IllegalArgumentException(
519                            String.format(
520                                "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
521                                header, Arrays.toString(headerRecord)));
522                    }
523                    observedMissing |= blankHeader;
524                    if (header != null) {
525                        hdrMap.put(header, Integer.valueOf(i));
526                        if (headerNames == null) {
527                            headerNames = new ArrayList<>(headerRecord.length);
528                        }
529                        headerNames.add(header);
530                    }
531                }
532            }
533        }
534        if (headerNames == null) {
535            headerNames = Collections.emptyList(); // immutable
536        } else {
537            headerNames = Collections.unmodifiableList(headerNames);
538        }
539        return new Headers(hdrMap, headerNames);
540    }
541
542    /**
543     * Gets the current line number in the input stream.
544     *
545     * <p>
546     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
547     * the record number.
548     * </p>
549     *
550     * @return current line number
551     */
552    public long getCurrentLineNumber() {
553        return this.lexer.getCurrentLineNumber();
554    }
555
556    /**
557     * Gets the first end-of-line string encountered.
558     *
559     * @return the first end-of-line string
560     * @since 1.5
561     */
562    public String getFirstEndOfLine() {
563        return lexer.getFirstEol();
564    }
565
566    /**
567     * Gets the header comment, if any.
568     * The header comment appears before the header record.
569     *
570     * @return the header comment for this stream, or null if no comment is available.
571     * @since 1.10.0
572     */
573    public String getHeaderComment() {
574        return headerComment;
575    }
576
577    /**
578     * Gets a copy of the header map as defined in the CSVFormat's header.
579     * <p>
580     * The map keys are column names. The map values are 0-based indices.
581     * </p>
582     * <p>
583     * Note: The map can only provide a one-to-one mapping when the format did not
584     * contain null or duplicate column names.
585     * </p>
586     *
587     * @return a copy of the header map.
588     */
589    public Map<String, Integer> getHeaderMap() {
590        if (this.headers.headerMap == null) {
591            return null;
592        }
593        final Map<String, Integer> map = createEmptyHeaderMap();
594        map.putAll(this.headers.headerMap);
595        return map;
596    }
597
598    /**
599     * Gets the underlying header map.
600     *
601     * @return the underlying header map.
602     */
603    Map<String, Integer> getHeaderMapRaw() {
604        return this.headers.headerMap;
605    }
606
607    /**
608     * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header.
609     * <p>
610     * Note: The list provides strings that can be used as keys in the header map.
611     * The list will not contain null column names if they were present in the input
612     * format.
613     * </p>
614     *
615     * @return read-only list of header names that iterates in column order.
616     * @see #getHeaderMap()
617     * @since 1.7
618     */
619    public List<String> getHeaderNames() {
620        return Collections.unmodifiableList(headers.headerNames);
621    }
622
623    /**
624     * Gets the current record number in the input stream.
625     *
626     * <p>
627     * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
628     * the line number.
629     * </p>
630     *
631     * @return current record number
632     */
633    public long getRecordNumber() {
634        return this.recordNumber;
635    }
636
637    /**
638     * Parses the CSV input according to the given format and returns the content as a list of
639     * {@link CSVRecord CSVRecords}.
640     *
641     * <p>
642     * The returned content starts at the current parse-position in the stream.
643     * </p>
644     *
645     * @return list of {@link CSVRecord CSVRecords}, may be empty
646     * @throws UncheckedIOException
647     *             on parse error or input read-failure
648     */
649    public List<CSVRecord> getRecords() {
650        return stream().collect(Collectors.toList());
651    }
652
653    /**
654     * Gets the trailer comment, if any.
655     * Trailer comments are located between the last record and EOF
656     *
657     * @return the trailer comment for this stream, or null if no comment is available.
658     * @since 1.10.0
659     */
660    public String getTrailerComment() {
661        return trailerComment;
662    }
663
664    /**
665     * Handle whether input is parsed as null
666     *
667     * @param input
668     *           the cell data to further processed
669     * @return null if input is parsed as null, or input itself if input isn't parsed as null
670     */
671    private String handleNull(final String input) {
672        final boolean isQuoted = this.reusableToken.isQuoted;
673        final String nullString = format.getNullString();
674        final boolean strictQuoteMode = isStrictQuoteMode();
675        if (input.equals(nullString)) {
676            // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
677            return strictQuoteMode && isQuoted ? input : null;
678        }
679        // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
680        return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
681    }
682
683    /**
684     * Checks whether there is a header comment.
685     * The header comment appears before the header record.
686     * Note that if the parser's format has been given an explicit header
687     * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
688     * and the header record is not being skipped
689     * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
690     * will be associated with the first record, not the header.
691     *
692     * @return true if this parser has seen a header comment, false otherwise
693     * @since 1.10.0
694     */
695    public boolean hasHeaderComment() {
696        return headerComment != null;
697    }
698
699    /**
700     * Checks whether there is a trailer comment.
701     * Trailer comments are located between the last record and EOF.
702     * The trailer comments will only be available after the parser has
703     * finished processing this stream.
704     *
705     * @return true if this parser has seen a trailer comment, false otherwise
706     * @since 1.10.0
707     */
708    public boolean hasTrailerComment() {
709        return trailerComment != null;
710    }
711
712    /**
713     * Tests whether this parser is closed.
714     *
715     * @return whether this parser is closed.
716     */
717    public boolean isClosed() {
718        return this.lexer.isClosed();
719    }
720
721    /**
722     * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
723     *
724     * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
725     *         {@link QuoteMode#NON_NUMERIC}.
726     */
727    private boolean isStrictQuoteMode() {
728        return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
729               this.format.getQuoteMode() == QuoteMode.NON_NUMERIC;
730    }
731
732    /**
733     * Returns the record iterator.
734     *
735     * <p>
736     * An {@link IOException} caught during the iteration are re-thrown as an
737     * {@link IllegalStateException}.
738     * </p>
739     * <p>
740     * If the parser is closed, the iterator will not yield any more records.
741     * A call to {@link Iterator#hasNext()} will return {@code false} and
742     * a call to {@link Iterator#next()} will throw a
743     * {@link NoSuchElementException}.
744     * </p>
745     * <p>
746     * If it is necessary to construct an iterator which is usable after the
747     * parser is closed, one option is to extract all records as a list with
748     * {@link #getRecords()}, and return an iterator to that list.
749     * </p>
750     */
751    @Override
752    public Iterator<CSVRecord> iterator() {
753        return csvRecordIterator;
754    }
755
756    /**
757     * Parses the next record from the current point in the stream.
758     *
759     * @return the record as an array of values, or {@code null} if the end of the stream has been reached
760     * @throws IOException
761     *             on parse error or input read-failure
762     */
763    CSVRecord nextRecord() throws IOException {
764        CSVRecord result = null;
765        this.recordList.clear();
766        StringBuilder sb = null;
767        final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
768        do {
769            this.reusableToken.reset();
770            this.lexer.nextToken(this.reusableToken);
771            switch (this.reusableToken.type) {
772            case TOKEN:
773                this.addRecordValue(false);
774                break;
775            case EORECORD:
776                this.addRecordValue(true);
777                break;
778            case EOF:
779                if (this.reusableToken.isReady) {
780                    this.addRecordValue(true);
781                } else if (sb != null) {
782                    trailerComment = sb.toString();
783                }
784                break;
785            case INVALID:
786                throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
787            case COMMENT: // Ignored currently
788                if (sb == null) { // first comment for this record
789                    sb = new StringBuilder();
790                } else {
791                    sb.append(Constants.LF);
792                }
793                sb.append(this.reusableToken.content);
794                this.reusableToken.type = TOKEN; // Read another token
795                break;
796            default:
797                throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
798            }
799        } while (this.reusableToken.type == TOKEN);
800
801        if (!this.recordList.isEmpty()) {
802            this.recordNumber++;
803            final String comment = sb == null ? null : sb.toString();
804            result = new CSVRecord(this, this.recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment,
805                this.recordNumber, startCharPosition);
806        }
807        return result;
808    }
809
810    /**
811     * Returns a sequential {@code Stream} with this collection as its source.
812     * <p>
813     * If the parser is closed, the stream will not produce any more values.
814     * See the comments in {@link #iterator()}.
815     * </p>
816     * @return a sequential {@code Stream} with this collection as its source.
817     * @since 1.9.0
818     */
819    public Stream<CSVRecord> stream() {
820        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false);
821    }
822
823}