001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Token.Type.TOKEN; 021 022import java.io.Closeable; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.io.Reader; 028import java.io.StringReader; 029import java.io.UncheckedIOException; 030import java.net.URL; 031import java.nio.charset.Charset; 032import java.nio.file.Files; 033import java.nio.file.Path; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Iterator; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Map; 041import java.util.NoSuchElementException; 042import java.util.Objects; 043import java.util.Spliterator; 044import java.util.Spliterators; 045import java.util.TreeMap; 046import java.util.stream.Collectors; 047import java.util.stream.Stream; 048import java.util.stream.StreamSupport; 049 050/** 051 * Parses CSV files according to the specified format. 052 * 053 * Because CSV appears in many different dialects, the parser supports many formats by allowing the 054 * specification of a {@link CSVFormat}. 055 * 056 * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. 057 * 058 * <h2>Creating instances</h2> 059 * <p> 060 * There are several static factory methods that can be used to create instances for various types of resources: 061 * </p> 062 * <ul> 063 * <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li> 064 * <li>{@link #parse(String, CSVFormat)}</li> 065 * <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li> 066 * </ul> 067 * <p> 068 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. 069 * 070 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: 071 * </p> 072 * <pre> 073 * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) { 074 * ... 075 * } 076 * </pre> 077 * 078 * <h2>Parsing record wise</h2> 079 * <p> 080 * To parse a CSV input from a file, you write: 081 * </p> 082 * 083 * <pre> 084 * File csvData = new File("/path/to/csv"); 085 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180); 086 * for (CSVRecord csvRecord : parser) { 087 * ... 088 * } 089 * </pre> 090 * 091 * <p> 092 * This will read the parse the contents of the file using the 093 * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format. 094 * </p> 095 * 096 * <p> 097 * To parse CSV input in a format like Excel, you write: 098 * </p> 099 * 100 * <pre> 101 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL); 102 * for (CSVRecord csvRecord : parser) { 103 * ... 104 * } 105 * </pre> 106 * 107 * <p> 108 * If the predefined formats don't match the format at hands, custom formats can be defined. More information about 109 * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. 110 * </p> 111 * 112 * <h2>Parsing into memory</h2> 113 * <p> 114 * If parsing record wise is not desired, the contents of the input can be read completely into memory. 115 * </p> 116 * 117 * <pre> 118 * Reader in = new StringReader("a;b\nc;d"); 119 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL); 120 * List<CSVRecord> list = parser.getRecords(); 121 * </pre> 122 * 123 * <p> 124 * There are two constraints that have to be kept in mind: 125 * </p> 126 * 127 * <ol> 128 * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from 129 * the input, those records will not end up in the in memory representation of your CSV data.</li> 130 * <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're 131 * parsing a 150MB file of CSV data the contents will be read completely into memory.</li> 132 * </ol> 133 * 134 * <h2>Notes</h2> 135 * <p> 136 * Internal parser state is completely covered by the format and the reader-state. 137 * </p> 138 * 139 * @see <a href="package-summary.html">package documentation for more details</a> 140 */ 141public final class CSVParser implements Iterable<CSVRecord>, Closeable { 142 143 class CSVRecordIterator implements Iterator<CSVRecord> { 144 private CSVRecord current; 145 146 private CSVRecord getNextRecord() { 147 try { 148 return CSVParser.this.nextRecord(); 149 } catch (final IOException e) { 150 throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); 151 } 152 } 153 154 @Override 155 public boolean hasNext() { 156 if (CSVParser.this.isClosed()) { 157 return false; 158 } 159 if (this.current == null) { 160 this.current = this.getNextRecord(); 161 } 162 163 return this.current != null; 164 } 165 166 @Override 167 public CSVRecord next() { 168 if (CSVParser.this.isClosed()) { 169 throw new NoSuchElementException("CSVParser has been closed"); 170 } 171 CSVRecord next = this.current; 172 this.current = null; 173 174 if (next == null) { 175 // hasNext() wasn't called before 176 next = this.getNextRecord(); 177 if (next == null) { 178 throw new NoSuchElementException("No more CSV records available"); 179 } 180 } 181 182 return next; 183 } 184 185 @Override 186 public void remove() { 187 throw new UnsupportedOperationException(); 188 } 189 } 190 191 /** 192 * Header information based on name and position. 193 */ 194 private static final class Headers { 195 196 /** 197 * Header column positions (0-based) 198 */ 199 final Map<String, Integer> headerMap; 200 201 /** 202 * Header names in column order 203 */ 204 final List<String> headerNames; 205 206 Headers(final Map<String, Integer> headerMap, final List<String> headerNames) { 207 this.headerMap = headerMap; 208 this.headerNames = headerNames; 209 } 210 } 211 212 /** 213 * Creates a parser for the given {@link File}. 214 * 215 * @param file 216 * a CSV file. Must not be null. 217 * @param charset 218 * The Charset to decode the given file. 219 * @param format 220 * the CSVFormat used for CSV parsing. Must not be null. 221 * @return a new parser 222 * @throws IllegalArgumentException 223 * If the parameters of the format are inconsistent or if either file or format are null. 224 * @throws IOException 225 * If an I/O error occurs 226 */ 227 public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { 228 Objects.requireNonNull(file, "file"); 229 return parse(file.toPath(), charset, format); 230 } 231 232 /** 233 * Creates a CSV parser using the given {@link CSVFormat}. 234 * 235 * <p> 236 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 237 * unless you close the {@code reader}. 238 * </p> 239 * 240 * @param inputStream 241 * an InputStream containing CSV-formatted input. Must not be null. 242 * @param charset 243 * The Charset to decode the given file. 244 * @param format 245 * the CSVFormat used for CSV parsing. Must not be null. 246 * @return a new CSVParser configured with the given reader and format. 247 * @throws IllegalArgumentException 248 * If the parameters of the format are inconsistent or if either reader or format are null. 249 * @throws IOException 250 * If there is a problem reading the header or skipping the first record 251 * @since 1.5 252 */ 253 @SuppressWarnings("resource") 254 public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) 255 throws IOException { 256 Objects.requireNonNull(inputStream, "inputStream"); 257 Objects.requireNonNull(format, "format"); 258 return parse(new InputStreamReader(inputStream, charset), format); 259 } 260 261 /** 262 * Creates and returns a parser for the given {@link Path}, which the caller MUST close. 263 * 264 * @param path 265 * a CSV file. Must not be null. 266 * @param charset 267 * The Charset to decode the given file. 268 * @param format 269 * the CSVFormat used for CSV parsing. Must not be null. 270 * @return a new parser 271 * @throws IllegalArgumentException 272 * If the parameters of the format are inconsistent or if either file or format are null. 273 * @throws IOException 274 * If an I/O error occurs 275 * @since 1.5 276 */ 277 @SuppressWarnings("resource") 278 public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { 279 Objects.requireNonNull(path, "path"); 280 Objects.requireNonNull(format, "format"); 281 return parse(Files.newInputStream(path), charset, format); 282 } 283 284 /** 285 * Creates a CSV parser using the given {@link CSVFormat} 286 * 287 * <p> 288 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 289 * unless you close the {@code reader}. 290 * </p> 291 * 292 * @param reader 293 * a Reader containing CSV-formatted input. Must not be null. 294 * @param format 295 * the CSVFormat used for CSV parsing. Must not be null. 296 * @return a new CSVParser configured with the given reader and format. 297 * @throws IllegalArgumentException 298 * If the parameters of the format are inconsistent or if either reader or format are null. 299 * @throws IOException 300 * If there is a problem reading the header or skipping the first record 301 * @since 1.5 302 */ 303 public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { 304 return new CSVParser(reader, format); 305 } 306 307 // the following objects are shared to reduce garbage 308 309 /** 310 * Creates a parser for the given {@link String}. 311 * 312 * @param string 313 * a CSV string. Must not be null. 314 * @param format 315 * the CSVFormat used for CSV parsing. Must not be null. 316 * @return a new parser 317 * @throws IllegalArgumentException 318 * If the parameters of the format are inconsistent or if either string or format are null. 319 * @throws IOException 320 * If an I/O error occurs 321 */ 322 public static CSVParser parse(final String string, final CSVFormat format) throws IOException { 323 Objects.requireNonNull(string, "string"); 324 Objects.requireNonNull(format, "format"); 325 326 return new CSVParser(new StringReader(string), format); 327 } 328 329 /** 330 * Creates and returns a parser for the given URL, which the caller MUST close. 331 * 332 * <p> 333 * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless 334 * you close the {@code url}. 335 * </p> 336 * 337 * @param url 338 * a URL. Must not be null. 339 * @param charset 340 * the charset for the resource. Must not be null. 341 * @param format 342 * the CSVFormat used for CSV parsing. Must not be null. 343 * @return a new parser 344 * @throws IllegalArgumentException 345 * If the parameters of the format are inconsistent or if either url, charset or format are null. 346 * @throws IOException 347 * If an I/O error occurs 348 */ 349 @SuppressWarnings("resource") 350 public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { 351 Objects.requireNonNull(url, "url"); 352 Objects.requireNonNull(charset, "charset"); 353 Objects.requireNonNull(format, "format"); 354 355 return new CSVParser(new InputStreamReader(url.openStream(), charset), format); 356 } 357 358 private String headerComment; 359 360 private String trailerComment; 361 362 private final CSVFormat format; 363 364 private final Headers headers; 365 366 private final Lexer lexer; 367 368 private final CSVRecordIterator csvRecordIterator; 369 370 /** A record buffer for getRecord(). Grows as necessary and is reused. */ 371 private final List<String> recordList = new ArrayList<>(); 372 373 /** 374 * The next record number to assign. 375 */ 376 private long recordNumber; 377 378 /** 379 * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination 380 * with {@link #recordNumber}. 381 */ 382 private final long characterOffset; 383 384 private final Token reusableToken = new Token(); 385 386 /** 387 * Constructs a new instance using the given {@link CSVFormat} 388 * 389 * <p> 390 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 391 * unless you close the {@code reader}. 392 * </p> 393 * 394 * @param reader 395 * a Reader containing CSV-formatted input. Must not be null. 396 * @param format 397 * the CSVFormat used for CSV parsing. Must not be null. 398 * @throws IllegalArgumentException 399 * If the parameters of the format are inconsistent or if either reader or format are null. 400 * @throws IOException 401 * If there is a problem reading the header or skipping the first record 402 */ 403 public CSVParser(final Reader reader, final CSVFormat format) throws IOException { 404 this(reader, format, 0, 1); 405 } 406 407 /** 408 * Constructs a new instance using the given {@link CSVFormat} 409 * 410 * <p> 411 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, 412 * unless you close the {@code reader}. 413 * </p> 414 * 415 * @param reader 416 * a Reader containing CSV-formatted input. Must not be null. 417 * @param format 418 * the CSVFormat used for CSV parsing. Must not be null. 419 * @param characterOffset 420 * Lexer offset when the parser does not start parsing at the beginning of the source. 421 * @param recordNumber 422 * The next record number to assign 423 * @throws IllegalArgumentException 424 * If the parameters of the format are inconsistent or if either reader or format are null. 425 * @throws IOException 426 * If there is a problem reading the header or skipping the first record 427 * @since 1.1 428 */ 429 @SuppressWarnings("resource") 430 public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) 431 throws IOException { 432 Objects.requireNonNull(reader, "reader"); 433 Objects.requireNonNull(format, "format"); 434 435 this.format = format.copy(); 436 this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); 437 this.csvRecordIterator = new CSVRecordIterator(); 438 this.headers = createHeaders(); 439 this.characterOffset = characterOffset; 440 this.recordNumber = recordNumber - 1; 441 } 442 443 private void addRecordValue(final boolean lastRecord) { 444 final String input = this.format.trim(this.reusableToken.content.toString()); 445 if (lastRecord && input.isEmpty() && this.format.getTrailingDelimiter()) { 446 return; 447 } 448 this.recordList.add(handleNull(input)); 449 } 450 451 /** 452 * Closes resources. 453 * 454 * @throws IOException 455 * If an I/O error occurs 456 */ 457 @Override 458 public void close() throws IOException { 459 if (this.lexer != null) { 460 this.lexer.close(); 461 } 462 } 463 464 private Map<String, Integer> createEmptyHeaderMap() { 465 return this.format.getIgnoreHeaderCase() ? 466 new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : 467 new LinkedHashMap<>(); 468 } 469 470 /** 471 * Creates the name to index mapping if the format defines a header. 472 * 473 * @return null if the format has no header. 474 * @throws IOException if there is a problem reading the header or skipping the first record 475 */ 476 private Headers createHeaders() throws IOException { 477 Map<String, Integer> hdrMap = null; 478 List<String> headerNames = null; 479 final String[] formatHeader = this.format.getHeader(); 480 if (formatHeader != null) { 481 hdrMap = createEmptyHeaderMap(); 482 String[] headerRecord = null; 483 if (formatHeader.length == 0) { 484 // read the header from the first line of the file 485 final CSVRecord nextRecord = this.nextRecord(); 486 if (nextRecord != null) { 487 headerRecord = nextRecord.values(); 488 headerComment = nextRecord.getComment(); 489 } 490 } else { 491 if (this.format.getSkipHeaderRecord()) { 492 final CSVRecord nextRecord = this.nextRecord(); 493 if (nextRecord != null) { 494 headerComment = nextRecord.getComment(); 495 } 496 } 497 headerRecord = formatHeader; 498 } 499 500 // build the name to index mappings 501 if (headerRecord != null) { 502 // Track an occurrence of a null, empty or blank header. 503 boolean observedMissing = false; 504 for (int i = 0; i < headerRecord.length; i++) { 505 final String header = headerRecord[i]; 506 final boolean blankHeader = CSVFormat.isBlank(header); 507 if (blankHeader && !this.format.getAllowMissingColumnNames()) { 508 throw new IllegalArgumentException( 509 "A header name is missing in " + Arrays.toString(headerRecord)); 510 } 511 512 final boolean containsHeader = blankHeader ? observedMissing : hdrMap.containsKey(header); 513 final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode(); 514 final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; 515 final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; 516 517 if (containsHeader && !duplicatesAllowed && !(blankHeader && emptyDuplicatesAllowed)) { 518 throw new IllegalArgumentException( 519 String.format( 520 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", 521 header, Arrays.toString(headerRecord))); 522 } 523 observedMissing |= blankHeader; 524 if (header != null) { 525 hdrMap.put(header, Integer.valueOf(i)); 526 if (headerNames == null) { 527 headerNames = new ArrayList<>(headerRecord.length); 528 } 529 headerNames.add(header); 530 } 531 } 532 } 533 } 534 if (headerNames == null) { 535 headerNames = Collections.emptyList(); // immutable 536 } else { 537 headerNames = Collections.unmodifiableList(headerNames); 538 } 539 return new Headers(hdrMap, headerNames); 540 } 541 542 /** 543 * Gets the current line number in the input stream. 544 * 545 * <p> 546 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 547 * the record number. 548 * </p> 549 * 550 * @return current line number 551 */ 552 public long getCurrentLineNumber() { 553 return this.lexer.getCurrentLineNumber(); 554 } 555 556 /** 557 * Gets the first end-of-line string encountered. 558 * 559 * @return the first end-of-line string 560 * @since 1.5 561 */ 562 public String getFirstEndOfLine() { 563 return lexer.getFirstEol(); 564 } 565 566 /** 567 * Gets the header comment, if any. 568 * The header comment appears before the header record. 569 * 570 * @return the header comment for this stream, or null if no comment is available. 571 * @since 1.10.0 572 */ 573 public String getHeaderComment() { 574 return headerComment; 575 } 576 577 /** 578 * Gets a copy of the header map as defined in the CSVFormat's header. 579 * <p> 580 * The map keys are column names. The map values are 0-based indices. 581 * </p> 582 * <p> 583 * Note: The map can only provide a one-to-one mapping when the format did not 584 * contain null or duplicate column names. 585 * </p> 586 * 587 * @return a copy of the header map. 588 */ 589 public Map<String, Integer> getHeaderMap() { 590 if (this.headers.headerMap == null) { 591 return null; 592 } 593 final Map<String, Integer> map = createEmptyHeaderMap(); 594 map.putAll(this.headers.headerMap); 595 return map; 596 } 597 598 /** 599 * Gets the underlying header map. 600 * 601 * @return the underlying header map. 602 */ 603 Map<String, Integer> getHeaderMapRaw() { 604 return this.headers.headerMap; 605 } 606 607 /** 608 * Gets a read-only list of header names that iterates in column order as defined in the CSVFormat's header. 609 * <p> 610 * Note: The list provides strings that can be used as keys in the header map. 611 * The list will not contain null column names if they were present in the input 612 * format. 613 * </p> 614 * 615 * @return read-only list of header names that iterates in column order. 616 * @see #getHeaderMap() 617 * @since 1.7 618 */ 619 public List<String> getHeaderNames() { 620 return Collections.unmodifiableList(headers.headerNames); 621 } 622 623 /** 624 * Gets the current record number in the input stream. 625 * 626 * <p> 627 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to 628 * the line number. 629 * </p> 630 * 631 * @return current record number 632 */ 633 public long getRecordNumber() { 634 return this.recordNumber; 635 } 636 637 /** 638 * Parses the CSV input according to the given format and returns the content as a list of 639 * {@link CSVRecord CSVRecords}. 640 * 641 * <p> 642 * The returned content starts at the current parse-position in the stream. 643 * </p> 644 * 645 * @return list of {@link CSVRecord CSVRecords}, may be empty 646 * @throws UncheckedIOException 647 * on parse error or input read-failure 648 */ 649 public List<CSVRecord> getRecords() { 650 return stream().collect(Collectors.toList()); 651 } 652 653 /** 654 * Gets the trailer comment, if any. 655 * Trailer comments are located between the last record and EOF 656 * 657 * @return the trailer comment for this stream, or null if no comment is available. 658 * @since 1.10.0 659 */ 660 public String getTrailerComment() { 661 return trailerComment; 662 } 663 664 /** 665 * Handle whether input is parsed as null 666 * 667 * @param input 668 * the cell data to further processed 669 * @return null if input is parsed as null, or input itself if input isn't parsed as null 670 */ 671 private String handleNull(final String input) { 672 final boolean isQuoted = this.reusableToken.isQuoted; 673 final String nullString = format.getNullString(); 674 final boolean strictQuoteMode = isStrictQuoteMode(); 675 if (input.equals(nullString)) { 676 // nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode 677 return strictQuoteMode && isQuoted ? input : null; 678 } 679 // don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode 680 return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input; 681 } 682 683 /** 684 * Checks whether there is a header comment. 685 * The header comment appears before the header record. 686 * Note that if the parser's format has been given an explicit header 687 * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) 688 * and the header record is not being skipped 689 * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments 690 * will be associated with the first record, not the header. 691 * 692 * @return true if this parser has seen a header comment, false otherwise 693 * @since 1.10.0 694 */ 695 public boolean hasHeaderComment() { 696 return headerComment != null; 697 } 698 699 /** 700 * Checks whether there is a trailer comment. 701 * Trailer comments are located between the last record and EOF. 702 * The trailer comments will only be available after the parser has 703 * finished processing this stream. 704 * 705 * @return true if this parser has seen a trailer comment, false otherwise 706 * @since 1.10.0 707 */ 708 public boolean hasTrailerComment() { 709 return trailerComment != null; 710 } 711 712 /** 713 * Tests whether this parser is closed. 714 * 715 * @return whether this parser is closed. 716 */ 717 public boolean isClosed() { 718 return this.lexer.isClosed(); 719 } 720 721 /** 722 * Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}. 723 * 724 * @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or 725 * {@link QuoteMode#NON_NUMERIC}. 726 */ 727 private boolean isStrictQuoteMode() { 728 return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL || 729 this.format.getQuoteMode() == QuoteMode.NON_NUMERIC; 730 } 731 732 /** 733 * Returns the record iterator. 734 * 735 * <p> 736 * An {@link IOException} caught during the iteration are re-thrown as an 737 * {@link IllegalStateException}. 738 * </p> 739 * <p> 740 * If the parser is closed, the iterator will not yield any more records. 741 * A call to {@link Iterator#hasNext()} will return {@code false} and 742 * a call to {@link Iterator#next()} will throw a 743 * {@link NoSuchElementException}. 744 * </p> 745 * <p> 746 * If it is necessary to construct an iterator which is usable after the 747 * parser is closed, one option is to extract all records as a list with 748 * {@link #getRecords()}, and return an iterator to that list. 749 * </p> 750 */ 751 @Override 752 public Iterator<CSVRecord> iterator() { 753 return csvRecordIterator; 754 } 755 756 /** 757 * Parses the next record from the current point in the stream. 758 * 759 * @return the record as an array of values, or {@code null} if the end of the stream has been reached 760 * @throws IOException 761 * on parse error or input read-failure 762 */ 763 CSVRecord nextRecord() throws IOException { 764 CSVRecord result = null; 765 this.recordList.clear(); 766 StringBuilder sb = null; 767 final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset; 768 do { 769 this.reusableToken.reset(); 770 this.lexer.nextToken(this.reusableToken); 771 switch (this.reusableToken.type) { 772 case TOKEN: 773 this.addRecordValue(false); 774 break; 775 case EORECORD: 776 this.addRecordValue(true); 777 break; 778 case EOF: 779 if (this.reusableToken.isReady) { 780 this.addRecordValue(true); 781 } else if (sb != null) { 782 trailerComment = sb.toString(); 783 } 784 break; 785 case INVALID: 786 throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence"); 787 case COMMENT: // Ignored currently 788 if (sb == null) { // first comment for this record 789 sb = new StringBuilder(); 790 } else { 791 sb.append(Constants.LF); 792 } 793 sb.append(this.reusableToken.content); 794 this.reusableToken.type = TOKEN; // Read another token 795 break; 796 default: 797 throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type); 798 } 799 } while (this.reusableToken.type == TOKEN); 800 801 if (!this.recordList.isEmpty()) { 802 this.recordNumber++; 803 final String comment = sb == null ? null : sb.toString(); 804 result = new CSVRecord(this, this.recordList.toArray(Constants.EMPTY_STRING_ARRAY), comment, 805 this.recordNumber, startCharPosition); 806 } 807 return result; 808 } 809 810 /** 811 * Returns a sequential {@code Stream} with this collection as its source. 812 * <p> 813 * If the parser is closed, the stream will not produce any more values. 814 * See the comments in {@link #iterator()}. 815 * </p> 816 * @return a sequential {@code Stream} with this collection as its source. 817 * @since 1.9.0 818 */ 819 public Stream<CSVRecord> stream() { 820 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator(), Spliterator.ORDERED), false); 821 } 822 823}