1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.csv; 19 20 import static org.apache.commons.csv.Constants.BACKSLASH; 21 import static org.apache.commons.csv.Constants.COMMA; 22 import static org.apache.commons.csv.Constants.COMMENT; 23 import static org.apache.commons.csv.Constants.CR; 24 import static org.apache.commons.csv.Constants.CRLF; 25 import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; 26 import static org.apache.commons.csv.Constants.EMPTY; 27 import static org.apache.commons.csv.Constants.LF; 28 import static org.apache.commons.csv.Constants.PIPE; 29 import static org.apache.commons.csv.Constants.SP; 30 import static org.apache.commons.csv.Constants.TAB; 31 import static org.apache.commons.io.IOUtils.EOF; 32 33 import java.io.File; 34 import java.io.FileOutputStream; 35 import java.io.IOException; 36 import java.io.InputStream; 37 import java.io.OutputStream; 38 import java.io.OutputStreamWriter; 39 import java.io.Reader; 40 import java.io.Serializable; 41 import java.io.StringWriter; 42 import java.io.Writer; 43 import java.nio.charset.Charset; 44 import java.nio.file.Files; 45 import java.nio.file.Path; 46 import java.sql.ResultSet; 47 import java.sql.ResultSetMetaData; 48 import java.sql.SQLException; 49 import java.util.Arrays; 50 import java.util.HashSet; 51 import java.util.Objects; 52 import java.util.Set; 53 54 import org.apache.commons.codec.binary.Base64OutputStream; 55 import org.apache.commons.io.IOUtils; 56 import org.apache.commons.io.function.Uncheck; 57 import org.apache.commons.io.output.AppendableOutputStream; 58 59 /** 60 * Specifies the format of a CSV file for parsing and writing. 61 * 62 * <h2>Using predefined formats</h2> 63 * 64 * <p> 65 * You can use one of the predefined formats: 66 * </p> 67 * 68 * <ul> 69 * <li>{@link #DEFAULT}</li> 70 * <li>{@link #EXCEL}</li> 71 * <li>{@link #INFORMIX_UNLOAD}</li> 72 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 73 * <li>{@link #MONGODB_CSV}</li> 74 * <li>{@link #MONGODB_TSV}</li> 75 * <li>{@link #MYSQL}</li> 76 * <li>{@link #ORACLE}</li> 77 * <li>{@link #POSTGRESQL_CSV}</li> 78 * <li>{@link #POSTGRESQL_TEXT}</li> 79 * <li>{@link #RFC4180}</li> 80 * <li>{@link #TDF}</li> 81 * </ul> 82 * 83 * <p> 84 * For example: 85 * </p> 86 * 87 * <pre> 88 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 89 * </pre> 90 * 91 * <p> 92 * The {@link CSVParser} provides static methods to parse other input types, for example: 93 * </p> 94 * 95 * <pre> 96 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 97 * </pre> 98 * 99 * <h2>Defining formats</h2> 100 * 101 * <p> 102 * You can extend a format by calling the {@code set} methods. For example: 103 * </p> 104 * 105 * <pre> 106 * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); 107 * </pre> 108 * 109 * <h2>Defining column names</h2> 110 * 111 * <p> 112 * To define the column names you want to use to access records, write: 113 * </p> 114 * 115 * <pre> 116 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); 117 * </pre> 118 * 119 * <p> 120 * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not 121 * contain a first record that also defines column names. 122 * 123 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 124 * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. 125 * </p> 126 * 127 * <h2>Parsing</h2> 128 * 129 * <p> 130 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 131 * </p> 132 * 133 * <pre> 134 * Reader in = ...; 135 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); 136 * </pre> 137 * 138 * <p> 139 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 140 * </p> 141 * 142 * <h2>Referencing columns safely</h2> 143 * 144 * <p> 145 * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no 146 * arguments: 147 * </p> 148 * 149 * <pre> 150 * CSVFormat.EXCEL.withHeader(); 151 * </pre> 152 * 153 * <p> 154 * This causes the parser to read the first record and use its values as column names. 155 * 156 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 157 * </p> 158 * 159 * <pre> 160 * String value = record.get("Col1"); 161 * </pre> 162 * 163 * <p> 164 * This makes your code impervious to changes in column order in the CSV file. 165 * </p> 166 * 167 * <h2>Serialization</h2> 168 * <p> 169 * This class implements the {@link Serializable} interface with the following caveats: 170 * </p> 171 * <ul> 172 * <li>This class will no longer implement Serializable in 2.0.</li> 173 * <li>Serialization is not supported from one version to the next.</li> 174 * </ul> 175 * <p> 176 * The {@code serialVersionUID} values are: 177 * </p> 178 * <ul> 179 * <li>Version 1.10.0: {@code 2L}</li> 180 * <li>Version 1.9.0 through 1.0: {@code 1L}</li> 181 * </ul> 182 * 183 * <h2>Notes</h2> 184 * <p> 185 * This class is immutable. 186 * </p> 187 * <p> 188 * Not all settings are used for both parsing and writing. 189 * </p> 190 */ 191 public final class CSVFormat implements Serializable { 192 193 /** 194 * Builds CSVFormat instances. 195 * 196 * @since 1.9.0 197 */ 198 public static class Builder { 199 200 /** 201 * Creates a new default builder. 202 * 203 * @return a copy of the builder 204 */ 205 public static Builder create() { 206 return new Builder(CSVFormat.DEFAULT); 207 } 208 209 /** 210 * Creates a new builder for the given format. 211 * 212 * @param csvFormat the source format. 213 * @return a copy of the builder 214 */ 215 public static Builder create(final CSVFormat csvFormat) { 216 return new Builder(csvFormat); 217 } 218 219 private boolean allowMissingColumnNames; 220 221 private boolean autoFlush; 222 223 private Character commentMarker; 224 225 private String delimiter; 226 227 private DuplicateHeaderMode duplicateHeaderMode; 228 229 private Character escapeCharacter; 230 231 private String[] headerComments; 232 233 private String[] headers; 234 235 private boolean ignoreEmptyLines; 236 237 private boolean ignoreHeaderCase; 238 239 private boolean ignoreSurroundingSpaces; 240 241 private String nullString; 242 243 private Character quoteCharacter; 244 245 private String quotedNullString; 246 247 private QuoteMode quoteMode; 248 249 private String recordSeparator; 250 251 private boolean skipHeaderRecord; 252 253 private boolean lenientEof; 254 255 private boolean trailingData; 256 257 private boolean trailingDelimiter; 258 259 private boolean trim; 260 261 private Builder(final CSVFormat csvFormat) { 262 this.delimiter = csvFormat.delimiter; 263 this.quoteCharacter = csvFormat.quoteCharacter; 264 this.quoteMode = csvFormat.quoteMode; 265 this.commentMarker = csvFormat.commentMarker; 266 this.escapeCharacter = csvFormat.escapeCharacter; 267 this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; 268 this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; 269 this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; 270 this.recordSeparator = csvFormat.recordSeparator; 271 this.nullString = csvFormat.nullString; 272 this.headerComments = csvFormat.headerComments; 273 this.headers = csvFormat.headers; 274 this.skipHeaderRecord = csvFormat.skipHeaderRecord; 275 this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; 276 this.lenientEof = csvFormat.lenientEof; 277 this.trailingData = csvFormat.trailingData; 278 this.trailingDelimiter = csvFormat.trailingDelimiter; 279 this.trim = csvFormat.trim; 280 this.autoFlush = csvFormat.autoFlush; 281 this.quotedNullString = csvFormat.quotedNullString; 282 this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; 283 } 284 285 /** 286 * Builds a new CSVFormat instance. 287 * 288 * @return a new CSVFormat instance. 289 */ 290 public CSVFormat build() { 291 return new CSVFormat(this); 292 } 293 294 /** 295 * Sets the duplicate header names behavior, true to allow, false to disallow. 296 * 297 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 298 * @return This instance. 299 * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. 300 */ 301 @Deprecated 302 public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 303 setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); 304 return this; 305 } 306 307 /** 308 * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an 309 * {@link IllegalArgumentException} to be thrown. 310 * 311 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to 312 * cause an {@link IllegalArgumentException} to be thrown. 313 * @return This instance. 314 */ 315 public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { 316 this.allowMissingColumnNames = allowMissingColumnNames; 317 return this; 318 } 319 320 /** 321 * Sets whether to flush on close. 322 * 323 * @param autoFlush whether to flush on close. 324 * @return This instance. 325 */ 326 public Builder setAutoFlush(final boolean autoFlush) { 327 this.autoFlush = autoFlush; 328 return this; 329 } 330 331 /** 332 * Sets the comment marker character, use {@code null} to disable comments. 333 * <p> 334 * The comment start character is only recognized at the start of a line. 335 * </p> 336 * <p> 337 * Comments are printed first, before headers. 338 * </p> 339 * <p> 340 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 341 * each comment line. 342 * </p> 343 * <p> 344 * If the comment marker is not set, then the header comments are ignored. 345 * </p> 346 * <p> 347 * For example: 348 * </p> 349 * <pre> 350 * builder.setCommentMarker('#') 351 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 352 * </pre> 353 * <p> 354 * writes: 355 * </p> 356 * <pre> 357 * # Generated by Apache Commons CSV. 358 * # 1970-01-01T00:00:00Z 359 * </pre> 360 * 361 * @param commentMarker the comment start marker, use {@code null} to disable. 362 * @return This instance. 363 * @throws IllegalArgumentException thrown if the specified character is a line break 364 */ 365 public Builder setCommentMarker(final char commentMarker) { 366 setCommentMarker(Character.valueOf(commentMarker)); 367 return this; 368 } 369 370 /** 371 * Sets the comment marker character, use {@code null} to disable comments. 372 * <p> 373 * The comment start character is only recognized at the start of a line. 374 * </p> 375 * <p> 376 * Comments are printed first, before headers. 377 * </p> 378 * <p> 379 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 380 * each comment line. 381 * </p> 382 * <p> 383 * If the comment marker is not set, then the header comments are ignored. 384 * </p> 385 * <p> 386 * For example: 387 * </p> 388 * <pre> 389 * builder.setCommentMarker('#') 390 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 391 * </pre> 392 * <p> 393 * writes: 394 * </p> 395 * <pre> 396 * # Generated by Apache Commons CSV. 397 * # 1970-01-01T00:00:00Z 398 * </pre> 399 * 400 * @param commentMarker the comment start marker, use {@code null} to disable. 401 * @return This instance. 402 * @throws IllegalArgumentException thrown if the specified character is a line break 403 */ 404 public Builder setCommentMarker(final Character commentMarker) { 405 if (isLineBreak(commentMarker)) { 406 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 407 } 408 this.commentMarker = commentMarker; 409 return this; 410 } 411 412 /** 413 * Sets the delimiter character. 414 * 415 * @param delimiter the delimiter character. 416 * @return This instance. 417 */ 418 public Builder setDelimiter(final char delimiter) { 419 return setDelimiter(String.valueOf(delimiter)); 420 } 421 422 /** 423 * Sets the delimiter character. 424 * 425 * @param delimiter the delimiter character. 426 * @return This instance. 427 */ 428 public Builder setDelimiter(final String delimiter) { 429 if (containsLineBreak(delimiter)) { 430 throw new IllegalArgumentException("The delimiter cannot be a line break"); 431 } 432 if (delimiter.isEmpty()) { 433 throw new IllegalArgumentException("The delimiter cannot be empty"); 434 } 435 this.delimiter = delimiter; 436 return this; 437 } 438 439 /** 440 * Sets the duplicate header names behavior. 441 * 442 * @param duplicateHeaderMode the duplicate header names behavior 443 * @return This instance. 444 * @since 1.10.0 445 */ 446 public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { 447 this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); 448 return this; 449 } 450 451 /** 452 * Sets the escape character. 453 * 454 * @param escapeCharacter the escape character. 455 * @return This instance. 456 * @throws IllegalArgumentException thrown if the specified character is a line break 457 */ 458 public Builder setEscape(final char escapeCharacter) { 459 setEscape(Character.valueOf(escapeCharacter)); 460 return this; 461 } 462 463 /** 464 * Sets the escape character. 465 * 466 * @param escapeCharacter the escape character. 467 * @return This instance. 468 * @throws IllegalArgumentException thrown if the specified character is a line break 469 */ 470 public Builder setEscape(final Character escapeCharacter) { 471 if (isLineBreak(escapeCharacter)) { 472 throw new IllegalArgumentException("The escape character cannot be a line break"); 473 } 474 this.escapeCharacter = escapeCharacter; 475 return this; 476 } 477 478 /** 479 * Sets the header defined by the given {@link Enum} class. 480 * 481 * <p> 482 * Example: 483 * </p> 484 * 485 * <pre> 486 * public enum HeaderEnum { 487 * Name, Email, Phone 488 * } 489 * 490 * Builder builder = builder.setHeader(HeaderEnum.class); 491 * </pre> 492 * <p> 493 * The header is also used by the {@link CSVPrinter}. 494 * </p> 495 * 496 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 497 * @return This instance. 498 */ 499 public Builder setHeader(final Class<? extends Enum<?>> headerEnum) { 500 String[] header = null; 501 if (headerEnum != null) { 502 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 503 header = new String[enumValues.length]; 504 Arrays.setAll(header, i -> enumValues[i].name()); 505 } 506 return setHeader(header); 507 } 508 509 /** 510 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 511 * 512 * <pre> 513 * builder.setHeader(); 514 * </pre> 515 * 516 * or specified manually with: 517 * 518 * <pre> 519 * builder.setHeader(resultSet); 520 * </pre> 521 * <p> 522 * The header is also used by the {@link CSVPrinter}. 523 * </p> 524 * 525 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 526 * @return This instance. 527 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 528 */ 529 public Builder setHeader(final ResultSet resultSet) throws SQLException { 530 return setHeader(resultSet != null ? resultSet.getMetaData() : null); 531 } 532 533 /** 534 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 535 * 536 * <pre> 537 * builder.setHeader(); 538 * </pre> 539 * 540 * or specified manually with: 541 * 542 * <pre> 543 * builder.setHeader(resultSetMetaData); 544 * </pre> 545 * <p> 546 * The header is also used by the {@link CSVPrinter}. 547 * </p> 548 * 549 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 550 * @return This instance. 551 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 552 */ 553 public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 554 String[] labels = null; 555 if (resultSetMetaData != null) { 556 final int columnCount = resultSetMetaData.getColumnCount(); 557 labels = new String[columnCount]; 558 for (int i = 0; i < columnCount; i++) { 559 labels[i] = resultSetMetaData.getColumnLabel(i + 1); 560 } 561 } 562 return setHeader(labels); 563 } 564 565 /** 566 * Sets the header to the given values. The header can be parsed automatically from the input file with: 567 * 568 * <pre> 569 * builder.setHeader(); 570 * </pre> 571 * 572 * or specified manually with: 573 * 574 * <pre> 575 * builder.setHeader("name", "email", "phone"); 576 * </pre> 577 * <p> 578 * The header is also used by the {@link CSVPrinter}. 579 * </p> 580 * 581 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 582 * @return This instance. 583 */ 584 public Builder setHeader(final String... header) { 585 this.headers = CSVFormat.clone(header); 586 return this; 587 } 588 589 /** 590 * Sets the header comments to write before the CSV data. 591 * <p> 592 * This setting is ignored by the parser. 593 * </p> 594 * <p> 595 * Comments are printed first, before headers. 596 * </p> 597 * <p> 598 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 599 * each comment line. 600 * </p> 601 * <p> 602 * If the comment marker is not set, then the header comments are ignored. 603 * </p> 604 * <p> 605 * For example: 606 * </p> 607 * <pre> 608 * builder.setCommentMarker('#') 609 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 610 * </pre> 611 * <p> 612 * writes: 613 * </p> 614 * <pre> 615 * # Generated by Apache Commons CSV. 616 * # 1970-01-01T00:00:00Z 617 * </pre> 618 * 619 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 620 * @return This instance. 621 */ 622 public Builder setHeaderComments(final Object... headerComments) { 623 this.headerComments = CSVFormat.clone(toStringArray(headerComments)); 624 return this; 625 } 626 627 /** 628 * Sets the header comments to write before the CSV data. 629 * <p> 630 * This setting is ignored by the parser. 631 * </p> 632 * <p> 633 * Comments are printed first, before headers. 634 * </p> 635 * <p> 636 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of 637 * each comment line. 638 * </p> 639 * <p> 640 * If the comment marker is not set, then the header comments are ignored. 641 * </p> 642 * <p> 643 * For example: 644 * </p> 645 * <pre> 646 * builder.setCommentMarker('#') 647 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); 648 * </pre> 649 * <p> 650 * writes: 651 * </p> 652 * <pre> 653 * # Generated by Apache Commons CSV. 654 * # 1970-01-01T00:00:00Z 655 * </pre> 656 * 657 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 658 * @return This instance. 659 */ 660 public Builder setHeaderComments(final String... headerComments) { 661 this.headerComments = CSVFormat.clone(headerComments); 662 return this; 663 } 664 665 /** 666 * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty 667 * records. 668 * 669 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate 670 * empty lines to empty records. 671 * @return This instance. 672 */ 673 public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { 674 this.ignoreEmptyLines = ignoreEmptyLines; 675 return this; 676 } 677 678 /** 679 * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 680 * 681 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 682 * @return This instance. 683 */ 684 public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { 685 this.ignoreHeaderCase = ignoreHeaderCase; 686 return this; 687 } 688 689 /** 690 * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 691 * 692 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 693 * @return This instance. 694 */ 695 public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 696 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 697 return this; 698 } 699 700 /** 701 * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 702 * 703 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 704 * @return This instance. 705 * @since 1.11.0 706 */ 707 public Builder setLenientEof(final boolean lenientEof) { 708 this.lenientEof = lenientEof; 709 return this; 710 } 711 712 /** 713 * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. 714 * 715 * <ul> 716 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 717 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 718 * </ul> 719 * 720 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. 721 * @return This instance. 722 */ 723 public Builder setNullString(final String nullString) { 724 this.nullString = nullString; 725 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 726 return this; 727 } 728 729 /** 730 * Sets the quote character. 731 * 732 * @param quoteCharacter the quote character. 733 * @return This instance. 734 */ 735 public Builder setQuote(final char quoteCharacter) { 736 setQuote(Character.valueOf(quoteCharacter)); 737 return this; 738 } 739 740 /** 741 * Sets the quote character, use {@code null} to disable. 742 * 743 * @param quoteCharacter the quote character, use {@code null} to disable. 744 * @return This instance. 745 */ 746 public Builder setQuote(final Character quoteCharacter) { 747 if (isLineBreak(quoteCharacter)) { 748 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 749 } 750 this.quoteCharacter = quoteCharacter; 751 return this; 752 } 753 754 /** 755 * Sets the quote policy to use for output. 756 * 757 * @param quoteMode the quote policy to use for output. 758 * @return This instance. 759 */ 760 public Builder setQuoteMode(final QuoteMode quoteMode) { 761 this.quoteMode = quoteMode; 762 return this; 763 } 764 765 /** 766 * Sets the record separator to use for output. 767 * 768 * <p> 769 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 770 * and "\r\n" 771 * </p> 772 * 773 * @param recordSeparator the record separator to use for output. 774 * @return This instance. 775 */ 776 public Builder setRecordSeparator(final char recordSeparator) { 777 this.recordSeparator = String.valueOf(recordSeparator); 778 return this; 779 } 780 781 /** 782 * Sets the record separator to use for output. 783 * 784 * <p> 785 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 786 * and "\r\n" 787 * </p> 788 * 789 * @param recordSeparator the record separator to use for output. 790 * @return This instance. 791 */ 792 public Builder setRecordSeparator(final String recordSeparator) { 793 this.recordSeparator = recordSeparator; 794 return this; 795 } 796 797 /** 798 * Sets whether to skip the header record. 799 * 800 * @param skipHeaderRecord whether to skip the header record. 801 * @return This instance. 802 */ 803 public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { 804 this.skipHeaderRecord = skipHeaderRecord; 805 return this; 806 } 807 808 /** 809 * Sets whether reading trailing data is allowed in records, helps Excel compatibility. 810 * 811 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 812 * @return This instance. 813 * @since 1.11.0 814 */ 815 public Builder setTrailingData(final boolean trailingData) { 816 this.trailingData = trailingData; 817 return this; 818 } 819 820 /** 821 * Sets whether to add a trailing delimiter. 822 * 823 * @param trailingDelimiter whether to add a trailing delimiter. 824 * @return This instance. 825 */ 826 public Builder setTrailingDelimiter(final boolean trailingDelimiter) { 827 this.trailingDelimiter = trailingDelimiter; 828 return this; 829 } 830 831 /** 832 * Sets whether to trim leading and trailing blanks. 833 * 834 * @param trim whether to trim leading and trailing blanks. 835 * @return This instance. 836 */ 837 public Builder setTrim(final boolean trim) { 838 this.trim = trim; 839 return this; 840 } 841 } 842 843 /** 844 * Predefines formats. 845 * 846 * @since 1.2 847 */ 848 public enum Predefined { 849 850 /** 851 * @see CSVFormat#DEFAULT 852 */ 853 Default(CSVFormat.DEFAULT), 854 855 /** 856 * @see CSVFormat#EXCEL 857 */ 858 Excel(CSVFormat.EXCEL), 859 860 /** 861 * @see CSVFormat#INFORMIX_UNLOAD 862 * @since 1.3 863 */ 864 InformixUnload(CSVFormat.INFORMIX_UNLOAD), 865 866 /** 867 * @see CSVFormat#INFORMIX_UNLOAD_CSV 868 * @since 1.3 869 */ 870 InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), 871 872 /** 873 * @see CSVFormat#MONGODB_CSV 874 * @since 1.7 875 */ 876 MongoDBCsv(CSVFormat.MONGODB_CSV), 877 878 /** 879 * @see CSVFormat#MONGODB_TSV 880 * @since 1.7 881 */ 882 MongoDBTsv(CSVFormat.MONGODB_TSV), 883 884 /** 885 * @see CSVFormat#MYSQL 886 */ 887 MySQL(CSVFormat.MYSQL), 888 889 /** 890 * @see CSVFormat#ORACLE 891 */ 892 Oracle(CSVFormat.ORACLE), 893 894 /** 895 * @see CSVFormat#POSTGRESQL_CSV 896 * @since 1.5 897 */ 898 PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), 899 900 /** 901 * @see CSVFormat#POSTGRESQL_CSV 902 */ 903 PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), 904 905 /** 906 * @see CSVFormat#RFC4180 907 */ 908 RFC4180(CSVFormat.RFC4180), 909 910 /** 911 * @see CSVFormat#TDF 912 */ 913 TDF(CSVFormat.TDF); 914 915 private final CSVFormat format; 916 917 Predefined(final CSVFormat format) { 918 this.format = format; 919 } 920 921 /** 922 * Gets the format. 923 * 924 * @return the format. 925 */ 926 public CSVFormat getFormat() { 927 return format; 928 } 929 } 930 931 /** 932 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing 933 * empty lines. 934 * 935 * <p> 936 * The {@link Builder} settings are: 937 * </p> 938 * <ul> 939 * <li>{@code setDelimiter(',')}</li> 940 * <li>{@code setQuote('"')}</li> 941 * <li>{@code setRecordSeparator("\r\n")}</li> 942 * <li>{@code setIgnoreEmptyLines(true)}</li> 943 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 944 * </ul> 945 * 946 * @see Predefined#Default 947 */ 948 public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false, 949 false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false); 950 951 /** 952 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary 953 * to customize this format to accommodate your regional settings. 954 * 955 * <p> 956 * For example for parsing or generating a CSV file on a French system the following format will be used: 957 * </p> 958 * 959 * <pre> 960 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); 961 * </pre> 962 * 963 * <p> 964 * The {@link Builder} settings are: 965 * </p> 966 * <ul> 967 * <li>{@code setDelimiter(',')}</li> 968 * <li>{@code setQuote('"')}</li> 969 * <li>{@code setRecordSeparator("\r\n")}</li> 970 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 971 * <li>{@code setIgnoreEmptyLines(false)}</li> 972 * <li>{@code setAllowMissingColumnNames(true)}</li> 973 * <li>{@code setTrailingData(true)}</li> 974 * <li>{@code setLenientEof(true)}</li> 975 * </ul> 976 * <p> 977 * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and 978 * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. 979 * </p> 980 * 981 * @see Predefined#Excel 982 */ 983 // @formatter:off 984 public static final CSVFormat EXCEL = DEFAULT.builder() 985 .setIgnoreEmptyLines(false) 986 .setAllowMissingColumnNames(true) 987 .setTrailingData(true) 988 .setLenientEof(true) 989 .build(); 990 // @formatter:on 991 992 /** 993 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 994 * 995 * <p> 996 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 997 * The default NULL string is {@code "\\N"}. 998 * </p> 999 * 1000 * <p> 1001 * The {@link Builder} settings are: 1002 * </p> 1003 * <ul> 1004 * <li>{@code setDelimiter(',')}</li> 1005 * <li>{@code setEscape('\\')}</li> 1006 * <li>{@code setQuote("\"")}</li> 1007 * <li>{@code setRecordSeparator('\n')}</li> 1008 * </ul> 1009 * 1010 * @see Predefined#MySQL 1011 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1012 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1013 * @since 1.3 1014 */ 1015 // @formatter:off 1016 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() 1017 .setDelimiter(PIPE) 1018 .setEscape(BACKSLASH) 1019 .setQuote(DOUBLE_QUOTE_CHAR) 1020 .setRecordSeparator(LF) 1021 .build(); 1022 // @formatter:on 1023 1024 /** 1025 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 1026 * 1027 * <p> 1028 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1029 * The default NULL string is {@code "\\N"}. 1030 * </p> 1031 * 1032 * <p> 1033 * The {@link Builder} settings are: 1034 * </p> 1035 * <ul> 1036 * <li>{@code setDelimiter(',')}</li> 1037 * <li>{@code setQuote("\"")}</li> 1038 * <li>{@code setRecordSeparator('\n')}</li> 1039 * </ul> 1040 * 1041 * @see Predefined#MySQL 1042 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1043 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1044 * @since 1.3 1045 */ 1046 // @formatter:off 1047 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() 1048 .setDelimiter(COMMA) 1049 .setQuote(DOUBLE_QUOTE_CHAR) 1050 .setRecordSeparator(LF) 1051 .build(); 1052 // @formatter:on 1053 1054 /** 1055 * Default MongoDB CSV format used by the {@code mongoexport} operation. 1056 * <p> 1057 * <b>Parsing is not supported yet.</b> 1058 * </p> 1059 * 1060 * <p> 1061 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field 1062 * names is expected. 1063 * </p> 1064 * <p> 1065 * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: 1066 * </p> 1067 * <blockquote>The csv parser accepts that data that complies with RFC <a href="https://tools.ietf.org/html/4180">RFC-4180</a>. 1068 * As a result, backslashes are not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape 1069 * internal double-quote marks by prepending another double-quote. 1070 * </blockquote> 1071 * <p> 1072 * The {@link Builder} settings are: 1073 * </p> 1074 * <ul> 1075 * <li>{@code setDelimiter(',')}</li> 1076 * <li>{@code setEscape('"')}</li> 1077 * <li>{@code setQuote('"')}</li> 1078 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1079 * <li>{@code setSkipHeaderRecord(false)}</li> 1080 * </ul> 1081 * 1082 * @see Predefined#MongoDBCsv 1083 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command documentation</a> 1084 * @since 1.7 1085 */ 1086 // @formatter:off 1087 public static final CSVFormat MONGODB_CSV = DEFAULT.builder() 1088 .setDelimiter(COMMA) 1089 .setEscape(DOUBLE_QUOTE_CHAR) 1090 .setQuote(DOUBLE_QUOTE_CHAR) 1091 .setQuoteMode(QuoteMode.MINIMAL) 1092 .setSkipHeaderRecord(false) 1093 .build(); 1094 // @formatter:off 1095 1096 /** 1097 * Default MongoDB TSV format used by the {@code mongoexport} operation. 1098 * <p> 1099 * <b>Parsing is not supported yet.</b> 1100 * </p> 1101 * 1102 * <p> 1103 * This is a tab-delimited format. Values are double quoted only if needed and special 1104 * characters are escaped with {@code '"'}. A header line with field names is expected. 1105 * </p> 1106 * 1107 * <p> 1108 * The {@link Builder} settings are: 1109 * </p> 1110 * <ul> 1111 * <li>{@code setDelimiter('\t')}</li> 1112 * <li>{@code setEscape('"')}</li> 1113 * <li>{@code setQuote('"')}</li> 1114 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1115 * <li>{@code setSkipHeaderRecord(false)}</li> 1116 * </ul> 1117 * 1118 * @see Predefined#MongoDBCsv 1119 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 1120 * documentation</a> 1121 * @since 1.7 1122 */ 1123 // @formatter:off 1124 public static final CSVFormat MONGODB_TSV = DEFAULT.builder() 1125 .setDelimiter(TAB) 1126 .setEscape(DOUBLE_QUOTE_CHAR) 1127 .setQuote(DOUBLE_QUOTE_CHAR) 1128 .setQuoteMode(QuoteMode.MINIMAL) 1129 .setSkipHeaderRecord(false) 1130 .build(); 1131 // @formatter:off 1132 1133 /** 1134 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 1135 * 1136 * <p> 1137 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1138 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 1139 * </p> 1140 * 1141 * <p> 1142 * The {@link Builder} settings are: 1143 * </p> 1144 * <ul> 1145 * <li>{@code setDelimiter('\t')}</li> 1146 * <li>{@code setEscape('\\')}</li> 1147 * <li>{@code setIgnoreEmptyLines(false)}</li> 1148 * <li>{@code setQuote(null)}</li> 1149 * <li>{@code setRecordSeparator('\n')}</li> 1150 * <li>{@code setNullString("\\N")}</li> 1151 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1152 * </ul> 1153 * 1154 * @see Predefined#MySQL 1155 * @see <a href="https://dev.mysql.com/doc/refman/5.1/en/load-data.html"> https://dev.mysql.com/doc/refman/5.1/en/load 1156 * -data.html</a> 1157 */ 1158 // @formatter:off 1159 public static final CSVFormat MYSQL = DEFAULT.builder() 1160 .setDelimiter(TAB) 1161 .setEscape(BACKSLASH) 1162 .setIgnoreEmptyLines(false) 1163 .setQuote(null) 1164 .setRecordSeparator(LF) 1165 .setNullString(Constants.SQL_NULL_STRING) 1166 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1167 .build(); 1168 // @formatter:off 1169 1170 /** 1171 * Default Oracle format used by the SQL*Loader utility. 1172 * 1173 * <p> 1174 * This is a comma-delimited format with the system line separator character as the record separator. Values are 1175 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 1176 * {@code ""}. Values are trimmed. 1177 * </p> 1178 * 1179 * <p> 1180 * The {@link Builder} settings are: 1181 * </p> 1182 * <ul> 1183 * <li>{@code setDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li> 1184 * <li>{@code setEscape('\\')}</li> 1185 * <li>{@code setIgnoreEmptyLines(false)}</li> 1186 * <li>{@code setQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 1187 * <li>{@code setNullString("\\N")}</li> 1188 * <li>{@code setTrim()}</li> 1189 * <li>{@code setSystemRecordSeparator()}</li> 1190 * <li>{@code setQuoteMode(QuoteMode.MINIMAL)}</li> 1191 * </ul> 1192 * 1193 * @see Predefined#Oracle 1194 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 1195 * @since 1.6 1196 */ 1197 // @formatter:off 1198 public static final CSVFormat ORACLE = DEFAULT.builder() 1199 .setDelimiter(COMMA) 1200 .setEscape(BACKSLASH) 1201 .setIgnoreEmptyLines(false) 1202 .setQuote(DOUBLE_QUOTE_CHAR) 1203 .setNullString(Constants.SQL_NULL_STRING) 1204 .setTrim(true) 1205 .setRecordSeparator(System.lineSeparator()) 1206 .setQuoteMode(QuoteMode.MINIMAL) 1207 .build(); 1208 // @formatter:off 1209 1210 /** 1211 * Default PostgreSQL CSV format used by the {@code COPY} operation. 1212 * 1213 * <p> 1214 * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special 1215 * characters are not escaped. The default NULL string is {@code ""}. 1216 * </p> 1217 * 1218 * <p> 1219 * The {@link Builder} settings are: 1220 * </p> 1221 * <ul> 1222 * <li>{@code setDelimiter(',')}</li> 1223 * <li>{@code setEscape(null)}</li> 1224 * <li>{@code setIgnoreEmptyLines(false)}</li> 1225 * <li>{@code setQuote('"')}</li> 1226 * <li>{@code setRecordSeparator('\n')}</li> 1227 * <li>{@code setNullString("")}</li> 1228 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1229 * </ul> 1230 * 1231 * @see Predefined#MySQL 1232 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1233 * documentation</a> 1234 * @since 1.5 1235 */ 1236 // @formatter:off 1237 public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() 1238 .setDelimiter(COMMA) 1239 .setEscape(null) 1240 .setIgnoreEmptyLines(false) 1241 .setQuote(DOUBLE_QUOTE_CHAR) 1242 .setRecordSeparator(LF) 1243 .setNullString(EMPTY) 1244 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1245 .build(); 1246 // @formatter:off 1247 1248 /** 1249 * Default PostgreSQL text format used by the {@code COPY} operation. 1250 * 1251 * <p> 1252 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1253 * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. 1254 * </p> 1255 * 1256 * <p> 1257 * The {@link Builder} settings are: 1258 * </p> 1259 * <ul> 1260 * <li>{@code setDelimiter('\t')}</li> 1261 * <li>{@code setEscape('\\')}</li> 1262 * <li>{@code setIgnoreEmptyLines(false)}</li> 1263 * <li>{@code setQuote(null)}</li> 1264 * <li>{@code setRecordSeparator('\n')}</li> 1265 * <li>{@code setNullString("\\N")}</li> 1266 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1267 * </ul> 1268 * 1269 * @see Predefined#MySQL 1270 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1271 * documentation</a> 1272 * @since 1.5 1273 */ 1274 // @formatter:off 1275 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() 1276 .setDelimiter(TAB) 1277 .setEscape(BACKSLASH) 1278 .setIgnoreEmptyLines(false) 1279 .setQuote(null) 1280 .setRecordSeparator(LF) 1281 .setNullString(Constants.SQL_NULL_STRING) 1282 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1283 .build(); 1284 // @formatter:off 1285 1286 /** 1287 * Comma separated format as defined by <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 1288 * 1289 * <p> 1290 * The {@link Builder} settings are: 1291 * </p> 1292 * <ul> 1293 * <li>{@code setDelimiter(',')}</li> 1294 * <li>{@code setQuote('"')}</li> 1295 * <li>{@code setRecordSeparator("\r\n")}</li> 1296 * <li>{@code setIgnoreEmptyLines(false)}</li> 1297 * </ul> 1298 * 1299 * @see Predefined#RFC4180 1300 */ 1301 public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).build(); 1302 1303 private static final long serialVersionUID = 2L; 1304 1305 /** 1306 * Tab-delimited format. 1307 * 1308 * <p> 1309 * The {@link Builder} settings are: 1310 * </p> 1311 * <ul> 1312 * <li>{@code setDelimiter('\t')}</li> 1313 * <li>{@code setQuote('"')}</li> 1314 * <li>{@code setRecordSeparator("\r\n")}</li> 1315 * <li>{@code setIgnoreSurroundingSpaces(true)}</li> 1316 * </ul> 1317 * 1318 * @see Predefined#TDF 1319 */ 1320 // @formatter:off 1321 public static final CSVFormat TDF = DEFAULT.builder() 1322 .setDelimiter(TAB) 1323 .setIgnoreSurroundingSpaces(true) 1324 .build(); 1325 // @formatter:on 1326 1327 /** 1328 * Null-safe clone of an array. 1329 * 1330 * @param <T> The array element type. 1331 * @param values the source array 1332 * @return the cloned array. 1333 */ 1334 @SafeVarargs 1335 static <T> T[] clone(final T... values) { 1336 return values == null ? null : values.clone(); 1337 } 1338 1339 /** 1340 * Returns true if the given string contains the search char. 1341 * 1342 * @param source the string to check. 1343 * @param searchCh the character to search. 1344 * 1345 * @return true if {@code c} contains a line break character 1346 */ 1347 private static boolean contains(final String source, final char searchCh) { 1348 return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; 1349 } 1350 1351 /** 1352 * Returns true if the given string contains a line break character. 1353 * 1354 * @param source the string to check. 1355 * 1356 * @return true if {@code c} contains a line break character. 1357 */ 1358 private static boolean containsLineBreak(final String source) { 1359 return contains(source, CR) || contains(source, LF); 1360 } 1361 1362 static boolean isBlank(final String value) { 1363 return value == null || value.trim().isEmpty(); 1364 } 1365 1366 /** 1367 * Returns true if the given character is a line break character. 1368 * 1369 * @param c the character to check. 1370 * 1371 * @return true if {@code c} is a line break character. 1372 */ 1373 private static boolean isLineBreak(final char c) { 1374 return c == LF || c == CR; 1375 } 1376 1377 /** 1378 * Returns true if the given character is a line break character. 1379 * 1380 * @param c the character to check, may be null. 1381 * 1382 * @return true if {@code c} is a line break character (and not null). 1383 */ 1384 private static boolean isLineBreak(final Character c) { 1385 return c != null && isLineBreak(c.charValue()); 1386 } 1387 1388 /** Same test as in as {@link String#trim()}. */ 1389 private static boolean isTrimChar(final char ch) { 1390 return ch <= SP; 1391 } 1392 1393 /** Same test as in as {@link String#trim()}. */ 1394 private static boolean isTrimChar(final CharSequence charSequence, final int pos) { 1395 return isTrimChar(charSequence.charAt(pos)); 1396 } 1397 1398 /** 1399 * Creates a new CSV format with the specified delimiter. 1400 * 1401 * <p> 1402 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. 1403 * </p> 1404 * 1405 * @param delimiter the char used for value separation, must not be a line break character 1406 * @return a new CSV format. 1407 * @throws IllegalArgumentException if the delimiter is a line break character 1408 * 1409 * @see #DEFAULT 1410 * @see #RFC4180 1411 * @see #MYSQL 1412 * @see #EXCEL 1413 * @see #TDF 1414 */ 1415 public static CSVFormat newFormat(final char delimiter) { 1416 return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, 1417 DuplicateHeaderMode.ALLOW_ALL, false, false); 1418 } 1419 1420 static String[] toStringArray(final Object[] values) { 1421 if (values == null) { 1422 return null; 1423 } 1424 final String[] strings = new String[values.length]; 1425 Arrays.setAll(strings, i -> Objects.toString(values[i], null)); 1426 return strings; 1427 } 1428 1429 static CharSequence trim(final CharSequence charSequence) { 1430 if (charSequence instanceof String) { 1431 return ((String) charSequence).trim(); 1432 } 1433 final int count = charSequence.length(); 1434 int len = count; 1435 int pos = 0; 1436 1437 while (pos < len && isTrimChar(charSequence, pos)) { 1438 pos++; 1439 } 1440 while (pos < len && isTrimChar(charSequence, len - 1)) { 1441 len--; 1442 } 1443 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1444 } 1445 1446 /** 1447 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 1448 * 1449 * @param format name 1450 * @return one of the predefined formats 1451 * @since 1.2 1452 */ 1453 public static CSVFormat valueOf(final String format) { 1454 return CSVFormat.Predefined.valueOf(format).getFormat(); 1455 } 1456 1457 private final DuplicateHeaderMode duplicateHeaderMode; 1458 1459 private final boolean allowMissingColumnNames; 1460 1461 private final boolean autoFlush; 1462 1463 /** Set to null if commenting is disabled. */ 1464 private final Character commentMarker; 1465 1466 private final String delimiter; 1467 1468 /** Set to null if escaping is disabled. */ 1469 private final Character escapeCharacter; 1470 1471 /** Array of header column names. */ 1472 private final String[] headers; 1473 1474 /** Array of header comment lines. */ 1475 private final String[] headerComments; 1476 1477 private final boolean ignoreEmptyLines; 1478 1479 /** Should ignore header names case. */ 1480 private final boolean ignoreHeaderCase; 1481 1482 /** TODO Should leading/trailing spaces be ignored around values?. */ 1483 private final boolean ignoreSurroundingSpaces; 1484 1485 /** The string to be used for null values. */ 1486 private final String nullString; 1487 1488 /** Set to null if quoting is disabled. */ 1489 private final Character quoteCharacter; 1490 1491 private final String quotedNullString; 1492 1493 private final QuoteMode quoteMode; 1494 1495 /** For output. */ 1496 private final String recordSeparator; 1497 1498 private final boolean skipHeaderRecord; 1499 1500 private final boolean lenientEof; 1501 1502 private final boolean trailingData; 1503 1504 private final boolean trailingDelimiter; 1505 1506 private final boolean trim; 1507 1508 private CSVFormat(final Builder builder) { 1509 this.delimiter = builder.delimiter; 1510 this.quoteCharacter = builder.quoteCharacter; 1511 this.quoteMode = builder.quoteMode; 1512 this.commentMarker = builder.commentMarker; 1513 this.escapeCharacter = builder.escapeCharacter; 1514 this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; 1515 this.allowMissingColumnNames = builder.allowMissingColumnNames; 1516 this.ignoreEmptyLines = builder.ignoreEmptyLines; 1517 this.recordSeparator = builder.recordSeparator; 1518 this.nullString = builder.nullString; 1519 this.headerComments = builder.headerComments; 1520 this.headers = builder.headers; 1521 this.skipHeaderRecord = builder.skipHeaderRecord; 1522 this.ignoreHeaderCase = builder.ignoreHeaderCase; 1523 this.lenientEof = builder.lenientEof; 1524 this.trailingData = builder.trailingData; 1525 this.trailingDelimiter = builder.trailingDelimiter; 1526 this.trim = builder.trim; 1527 this.autoFlush = builder.autoFlush; 1528 this.quotedNullString = builder.quotedNullString; 1529 this.duplicateHeaderMode = builder.duplicateHeaderMode; 1530 validate(); 1531 } 1532 1533 /** 1534 * Creates a customized CSV format. 1535 * 1536 * @param delimiter the char used for value separation, must not be a line break character. 1537 * @param quoteChar the Character used as value encapsulation marker, may be {@code null} to disable. 1538 * @param quoteMode the quote mode. 1539 * @param commentStart the Character used for comment identification, may be {@code null} to disable. 1540 * @param escape the Character used to escape special characters in values, may be {@code null} to disable. 1541 * @param ignoreSurroundingSpaces {@code true} when whitespaces enclosing values should be ignored. 1542 * @param ignoreEmptyLines {@code true} when the parser should skip empty lines. 1543 * @param recordSeparator the line separator to use for output. 1544 * @param nullString the line separator to use for output. 1545 * @param headerComments the comments to be printed by the Printer before the actual CSV data.. 1546 * @param header the header. 1547 * @param skipHeaderRecord if {@code true} the header row will be skipped. 1548 * @param allowMissingColumnNames if {@code true} the missing column names are allowed when parsing the header line. 1549 * @param ignoreHeaderCase if {@code true} header names will be accessed ignoring case when parsing input. 1550 * @param trim if {@code true} next record value will be trimmed. 1551 * @param trailingDelimiter if {@code true} the trailing delimiter wil be added before record separator (if set).. 1552 * @param autoFlush if {@code true} the underlying stream will be flushed before closing. 1553 * @param duplicateHeaderMode the behavior when handling duplicate headers. 1554 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 1555 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1556 * @throws IllegalArgumentException if the delimiter is a line break character. 1557 */ 1558 private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, 1559 final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, 1560 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, 1561 final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, 1562 final DuplicateHeaderMode duplicateHeaderMode, final boolean trailingData, final boolean lenientEof) { 1563 this.delimiter = delimiter; 1564 this.quoteCharacter = quoteChar; 1565 this.quoteMode = quoteMode; 1566 this.commentMarker = commentStart; 1567 this.escapeCharacter = escape; 1568 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 1569 this.allowMissingColumnNames = allowMissingColumnNames; 1570 this.ignoreEmptyLines = ignoreEmptyLines; 1571 this.recordSeparator = recordSeparator; 1572 this.nullString = nullString; 1573 this.headerComments = toStringArray(headerComments); 1574 this.headers = clone(header); 1575 this.skipHeaderRecord = skipHeaderRecord; 1576 this.ignoreHeaderCase = ignoreHeaderCase; 1577 this.lenientEof = lenientEof; 1578 this.trailingData = trailingData; 1579 this.trailingDelimiter = trailingDelimiter; 1580 this.trim = trim; 1581 this.autoFlush = autoFlush; 1582 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 1583 this.duplicateHeaderMode = duplicateHeaderMode; 1584 validate(); 1585 } 1586 1587 private void append(final char c, final Appendable appendable) throws IOException { 1588 //try { 1589 appendable.append(c); 1590 //} catch (final IOException e) { 1591 // throw new UncheckedIOException(e); 1592 //} 1593 } 1594 1595 private void append(final CharSequence csq, final Appendable appendable) throws IOException { 1596 //try { 1597 appendable.append(csq); 1598 //} catch (final IOException e) { 1599 // throw new UncheckedIOException(e); 1600 //} 1601 } 1602 1603 /** 1604 * Creates a new Builder for this instance. 1605 * 1606 * @return a new Builder. 1607 */ 1608 public Builder builder() { 1609 return Builder.create(this); 1610 } 1611 1612 /** 1613 * Creates a copy of this instance. 1614 * 1615 * @return a copy of this instance. 1616 */ 1617 CSVFormat copy() { 1618 return builder().build(); 1619 } 1620 1621 @Override 1622 public boolean equals(final Object obj) { 1623 if (this == obj) { 1624 return true; 1625 } 1626 if (obj == null) { 1627 return false; 1628 } 1629 if (getClass() != obj.getClass()) { 1630 return false; 1631 } 1632 final CSVFormat other = (CSVFormat) obj; 1633 return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && 1634 Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && 1635 duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && 1636 Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && 1637 ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && 1638 ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && 1639 Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && 1640 quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) && 1641 Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord && 1642 trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim; 1643 } 1644 1645 private void escape(final char c, final Appendable appendable) throws IOException { 1646 append(escapeCharacter.charValue(), appendable); 1647 append(c, appendable); 1648 } 1649 1650 /** 1651 * Formats the specified values. 1652 * 1653 * @param values the values to format 1654 * @return the formatted values 1655 */ 1656 public String format(final Object... values) { 1657 return Uncheck.get(() -> format_(values)); 1658 } 1659 1660 private String format_(final Object... values) throws IOException { 1661 final StringWriter out = new StringWriter(); 1662 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 1663 csvPrinter.printRecord(values); 1664 final String res = out.toString(); 1665 final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); 1666 return res.substring(0, len); 1667 } 1668 } 1669 1670 /** 1671 * Gets whether duplicate names are allowed in the headers. 1672 * 1673 * @return whether duplicate header names are allowed 1674 * @since 1.7 1675 * @deprecated Use {@link #getDuplicateHeaderMode()}. 1676 */ 1677 @Deprecated 1678 public boolean getAllowDuplicateHeaderNames() { 1679 return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; 1680 } 1681 1682 /** 1683 * Gets whether missing column names are allowed when parsing the header line. 1684 * 1685 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. 1686 */ 1687 public boolean getAllowMissingColumnNames() { 1688 return allowMissingColumnNames; 1689 } 1690 1691 /** 1692 * Gets whether to flush on close. 1693 * 1694 * @return whether to flush on close. 1695 * @since 1.6 1696 */ 1697 public boolean getAutoFlush() { 1698 return autoFlush; 1699 } 1700 1701 /** 1702 * Gets the comment marker character, {@code null} disables comments. 1703 * <p> 1704 * The comment start character is only recognized at the start of a line. 1705 * </p> 1706 * <p> 1707 * Comments are printed first, before headers. 1708 * </p> 1709 * <p> 1710 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment 1711 * marker written at the start of each comment line. 1712 * </p> 1713 * <p> 1714 * If the comment marker is not set, then the header comments are ignored. 1715 * </p> 1716 * <p> 1717 * For example: 1718 * </p> 1719 * <pre> 1720 * builder.setCommentMarker('#') 1721 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1722 * </pre> 1723 * <p> 1724 * writes: 1725 * </p> 1726 * <pre> 1727 * # Generated by Apache Commons CSV. 1728 * # 1970-01-01T00:00:00Z 1729 * </pre> 1730 * 1731 * @return the comment start marker, may be {@code null} 1732 */ 1733 public Character getCommentMarker() { 1734 return commentMarker; 1735 } 1736 1737 /** 1738 * Gets the first character delimiting the values (typically ';', ',' or '\t'). 1739 * 1740 * @return the first delimiter character. 1741 * @deprecated Use {@link #getDelimiterString()}. 1742 */ 1743 @Deprecated 1744 public char getDelimiter() { 1745 return delimiter.charAt(0); 1746 } 1747 1748 /** 1749 * Gets the character delimiting the values (typically ";", "," or "\t"). 1750 * 1751 * @return the delimiter. 1752 */ 1753 char[] getDelimiterCharArray() { 1754 return delimiter.toCharArray(); 1755 } 1756 1757 /** 1758 * Gets the character delimiting the values (typically ";", "," or "\t"). 1759 * 1760 * @return the delimiter. 1761 * @since 1.9.0 1762 */ 1763 public String getDelimiterString() { 1764 return delimiter; 1765 } 1766 1767 /** 1768 * Gets how duplicate headers are handled. 1769 * 1770 * @return if duplicate header values are allowed, allowed conditionally, or disallowed. 1771 * @since 1.10.0 1772 */ 1773 public DuplicateHeaderMode getDuplicateHeaderMode() { 1774 return duplicateHeaderMode; 1775 } 1776 1777 /** 1778 * Gets the escape character. 1779 * 1780 * @return the escape character, may be {@code 0} 1781 */ 1782 char getEscapeChar() { 1783 return escapeCharacter != null ? escapeCharacter.charValue() : 0; 1784 } 1785 1786 /** 1787 * Gets the escape character. 1788 * 1789 * @return the escape character, may be {@code null} 1790 */ 1791 public Character getEscapeCharacter() { 1792 return escapeCharacter; 1793 } 1794 1795 /** 1796 * Gets a copy of the header array. 1797 * 1798 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 1799 */ 1800 public String[] getHeader() { 1801 return headers != null ? headers.clone() : null; 1802 } 1803 1804 /** 1805 * Gets a copy of the header comment array to write before the CSV data. 1806 * <p> 1807 * This setting is ignored by the parser. 1808 * </p> 1809 * <p> 1810 * Comments are printed first, before headers. 1811 * </p> 1812 * <p> 1813 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment 1814 * marker written at the start of each comment line. 1815 * </p> 1816 * <p> 1817 * If the comment marker is not set, then the header comments are ignored. 1818 * </p> 1819 * <p> 1820 * For example: 1821 * </p> 1822 * <pre> 1823 * builder.setCommentMarker('#') 1824 * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1825 * </pre> 1826 * <p> 1827 * writes: 1828 * </p> 1829 * <pre> 1830 * # Generated by Apache Commons CSV. 1831 * # 1970-01-01T00:00:00Z 1832 * </pre> 1833 * 1834 * @return a copy of the header comment array; {@code null} if disabled. 1835 */ 1836 public String[] getHeaderComments() { 1837 return headerComments != null ? headerComments.clone() : null; 1838 } 1839 1840 /** 1841 * Gets whether empty lines between records are ignored when parsing input. 1842 * 1843 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. 1844 */ 1845 public boolean getIgnoreEmptyLines() { 1846 return ignoreEmptyLines; 1847 } 1848 1849 /** 1850 * Gets whether header names will be accessed ignoring case when parsing input. 1851 * 1852 * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. 1853 * @since 1.3 1854 */ 1855 public boolean getIgnoreHeaderCase() { 1856 return ignoreHeaderCase; 1857 } 1858 1859 /** 1860 * Gets whether spaces around values are ignored when parsing input. 1861 * 1862 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 1863 */ 1864 public boolean getIgnoreSurroundingSpaces() { 1865 return ignoreSurroundingSpaces; 1866 } 1867 1868 /** 1869 * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1870 * 1871 * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1872 * @since 1.11.0 1873 */ 1874 public boolean getLenientEof() { 1875 return lenientEof; 1876 } 1877 1878 /** 1879 * Gets the String to convert to and from {@code null}. 1880 * <ul> 1881 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 1882 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1883 * </ul> 1884 * 1885 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 1886 */ 1887 public String getNullString() { 1888 return nullString; 1889 } 1890 1891 /** 1892 * Gets the character used to encapsulate values containing special characters. 1893 * 1894 * @return the quoteChar character, may be {@code null} 1895 */ 1896 public Character getQuoteCharacter() { 1897 return quoteCharacter; 1898 } 1899 1900 /** 1901 * Gets the quote policy output fields. 1902 * 1903 * @return the quote policy 1904 */ 1905 public QuoteMode getQuoteMode() { 1906 return quoteMode; 1907 } 1908 1909 /** 1910 * Gets the record separator delimiting output records. 1911 * 1912 * @return the record separator 1913 */ 1914 public String getRecordSeparator() { 1915 return recordSeparator; 1916 } 1917 1918 /** 1919 * Gets whether to skip the header record. 1920 * 1921 * @return whether to skip the header record. 1922 */ 1923 public boolean getSkipHeaderRecord() { 1924 return skipHeaderRecord; 1925 } 1926 1927 /** 1928 * Gets whether reading trailing data is allowed in records, helps Excel compatibility. 1929 * 1930 * @return whether reading trailing data is allowed in records, helps Excel compatibility. 1931 * @since 1.11.0 1932 */ 1933 public boolean getTrailingData() { 1934 return trailingData; 1935 } 1936 1937 /** 1938 * Gets whether to add a trailing delimiter. 1939 * 1940 * @return whether to add a trailing delimiter. 1941 * @since 1.3 1942 */ 1943 public boolean getTrailingDelimiter() { 1944 return trailingDelimiter; 1945 } 1946 1947 /** 1948 * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by 1949 * {CSVParser#addRecordValue(boolean)} 1950 * 1951 * @return whether to trim leading and trailing blanks. 1952 */ 1953 public boolean getTrim() { 1954 return trim; 1955 } 1956 1957 @Override 1958 public int hashCode() { 1959 final int prime = 31; 1960 int result = 1; 1961 result = prime * result + Arrays.hashCode(headerComments); 1962 result = prime * result + Arrays.hashCode(headers); 1963 result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, 1964 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString, 1965 recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); 1966 return result; 1967 } 1968 1969 /** 1970 * Tests whether comments are supported by this format. 1971 * 1972 * Note that the comment introducer character is only recognized at the start of a line. 1973 * 1974 * @return {@code true} is comments are supported, {@code false} otherwise 1975 */ 1976 public boolean isCommentMarkerSet() { 1977 return commentMarker != null; 1978 } 1979 1980 /** 1981 * Tests whether the next characters constitute a delimiter 1982 * 1983 * @param ch0 1984 * the first char (index 0). 1985 * @param charSeq 1986 * the match char sequence 1987 * @param startIndex 1988 * where start to match 1989 * @param delimiter 1990 * the delimiter 1991 * @param delimiterLength 1992 * the delimiter length 1993 * @return true if the match is successful 1994 */ 1995 private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { 1996 if (ch0 != delimiter[0]) { 1997 return false; 1998 } 1999 final int len = charSeq.length(); 2000 if (startIndex + delimiterLength > len) { 2001 return false; 2002 } 2003 for (int i = 1; i < delimiterLength; i++) { 2004 if (charSeq.charAt(startIndex + i) != delimiter[i]) { 2005 return false; 2006 } 2007 } 2008 return true; 2009 } 2010 2011 /** 2012 * Tests whether escapes are being processed. 2013 * 2014 * @return {@code true} if escapes are processed 2015 */ 2016 public boolean isEscapeCharacterSet() { 2017 return escapeCharacter != null; 2018 } 2019 2020 /** 2021 * Tests whether a null string has been defined. 2022 * 2023 * @return {@code true} if a nullString is defined 2024 */ 2025 public boolean isNullStringSet() { 2026 return nullString != null; 2027 } 2028 2029 /** 2030 * Tests whether a quoteChar has been defined. 2031 * 2032 * @return {@code true} if a quoteChar is defined 2033 */ 2034 public boolean isQuoteCharacterSet() { 2035 return quoteCharacter != null; 2036 } 2037 2038 /** 2039 * Parses the specified content. 2040 * 2041 * <p> 2042 * See also the various static parse methods on {@link CSVParser}. 2043 * </p> 2044 * 2045 * @param reader the input stream 2046 * @return a parser over a stream of {@link CSVRecord}s. 2047 * @throws IOException If an I/O error occurs 2048 */ 2049 public CSVParser parse(final Reader reader) throws IOException { 2050 return new CSVParser(reader, this); 2051 } 2052 2053 /** 2054 * Prints to the specified output. 2055 * 2056 * <p> 2057 * See also {@link CSVPrinter}. 2058 * </p> 2059 * 2060 * @param out the output. 2061 * @return a printer to an output. 2062 * @throws IOException thrown if the optional header cannot be printed. 2063 */ 2064 public CSVPrinter print(final Appendable out) throws IOException { 2065 return new CSVPrinter(out, this); 2066 } 2067 2068 /** 2069 * Prints to the specified {@code File} with given {@code Charset}. 2070 * 2071 * <p> 2072 * See also {@link CSVPrinter}. 2073 * </p> 2074 * 2075 * @param out the output. 2076 * @param charset A charset. 2077 * @return a printer to an output. 2078 * @throws IOException thrown if the optional header cannot be printed. 2079 * @since 1.5 2080 */ 2081 @SuppressWarnings("resource") 2082 public CSVPrinter print(final File out, final Charset charset) throws IOException { 2083 // The writer will be closed when close() is called. 2084 return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); 2085 } 2086 2087 private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException { 2088 // InputStream is never null here 2089 // There is nothing to escape when quoting is used which is the default. 2090 if (!newRecord) { 2091 append(getDelimiterString(), out); 2092 } 2093 final boolean quoteCharacterSet = isQuoteCharacterSet(); 2094 if (quoteCharacterSet) { 2095 append(getQuoteCharacter().charValue(), out); 2096 } 2097 // Stream the input to the output without reading or holding the whole value in memory. 2098 // AppendableOutputStream cannot "close" an Appendable. 2099 try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) { 2100 IOUtils.copy(inputStream, outputStream); 2101 } 2102 if (quoteCharacterSet) { 2103 append(getQuoteCharacter().charValue(), out); 2104 } 2105 } 2106 2107 /** 2108 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to 2109 * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. 2110 * 2111 * @param value value to output. 2112 * @param out where to print the value. 2113 * @param newRecord if this a new record. 2114 * @throws IOException If an I/O error occurs. 2115 * @since 1.4 2116 */ 2117 public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 2118 // null values are considered empty 2119 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 2120 CharSequence charSequence; 2121 if (value == null) { 2122 // https://issues.apache.org/jira/browse/CSV-203 2123 if (null == nullString) { 2124 charSequence = EMPTY; 2125 } else if (QuoteMode.ALL == quoteMode) { 2126 charSequence = quotedNullString; 2127 } else { 2128 charSequence = nullString; 2129 } 2130 } else if (value instanceof CharSequence) { 2131 charSequence = (CharSequence) value; 2132 } else if (value instanceof Reader) { 2133 print((Reader) value, out, newRecord); 2134 return; 2135 } else if (value instanceof InputStream) { 2136 print((InputStream) value, out, newRecord); 2137 return; 2138 } else { 2139 charSequence = value.toString(); 2140 } 2141 charSequence = getTrim() ? trim(charSequence) : charSequence; 2142 print(value, charSequence, out, newRecord); 2143 } 2144 2145 private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { 2146 final int offset = 0; 2147 final int len = value.length(); 2148 if (!newRecord) { 2149 out.append(getDelimiterString()); 2150 } 2151 if (object == null) { 2152 out.append(value); 2153 } else if (isQuoteCharacterSet()) { 2154 // The original object is needed so can check for Number 2155 printWithQuotes(object, value, out, newRecord); 2156 } else if (isEscapeCharacterSet()) { 2157 printWithEscapes(value, out); 2158 } else { 2159 out.append(value, offset, len); 2160 } 2161 } 2162 2163 /** 2164 * Prints to the specified {@code Path} with given {@code Charset}, 2165 * returns a {@code CSVPrinter} which the caller MUST close. 2166 * 2167 * <p> 2168 * See also {@link CSVPrinter}. 2169 * </p> 2170 * 2171 * @param out the output. 2172 * @param charset A charset. 2173 * @return a printer to an output. 2174 * @throws IOException thrown if the optional header cannot be printed. 2175 * @since 1.5 2176 */ 2177 @SuppressWarnings("resource") 2178 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 2179 return print(Files.newBufferedWriter(out, charset)); 2180 } 2181 2182 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 2183 // Reader is never null here 2184 if (!newRecord) { 2185 append(getDelimiterString(), out); 2186 } 2187 if (isQuoteCharacterSet()) { 2188 printWithQuotes(reader, out); 2189 } else if (isEscapeCharacterSet()) { 2190 printWithEscapes(reader, out); 2191 } else if (out instanceof Writer) { 2192 IOUtils.copyLarge(reader, (Writer) out); 2193 } else { 2194 IOUtils.copy(reader, out); 2195 } 2196 } 2197 2198 /** 2199 * Prints to the {@link System#out}. 2200 * 2201 * <p> 2202 * See also {@link CSVPrinter}. 2203 * </p> 2204 * 2205 * @return a printer to {@link System#out}. 2206 * @throws IOException thrown if the optional header cannot be printed. 2207 * @since 1.5 2208 */ 2209 public CSVPrinter printer() throws IOException { 2210 return new CSVPrinter(System.out, this); 2211 } 2212 2213 /** 2214 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 2215 * 2216 * @param appendable where to write 2217 * @throws IOException If an I/O error occurs. 2218 * @since 1.4 2219 */ 2220 public synchronized void println(final Appendable appendable) throws IOException { 2221 if (getTrailingDelimiter()) { 2222 append(getDelimiterString(), appendable); 2223 } 2224 if (recordSeparator != null) { 2225 append(recordSeparator, appendable); 2226 } 2227 } 2228 2229 /** 2230 * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. 2231 * 2232 * <p> 2233 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing 2234 * the record, so there is no need to call {@link #println(Appendable)}. 2235 * </p> 2236 * 2237 * @param appendable where to write. 2238 * @param values values to output. 2239 * @throws IOException If an I/O error occurs. 2240 * @since 1.4 2241 */ 2242 public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { 2243 for (int i = 0; i < values.length; i++) { 2244 print(values[i], appendable, i == 0); 2245 } 2246 println(appendable); 2247 } 2248 2249 /* 2250 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2251 */ 2252 private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { 2253 int start = 0; 2254 int pos = 0; 2255 final int end = charSeq.length(); 2256 final char[] delimArray = getDelimiterCharArray(); 2257 final int delimLength = delimArray.length; 2258 final char escape = getEscapeChar(); 2259 while (pos < end) { 2260 char c = charSeq.charAt(pos); 2261 final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); 2262 final boolean isCr = c == CR; 2263 final boolean isLf = c == LF; 2264 if (isCr || isLf || c == escape || isDelimiterStart) { 2265 // write out segment up until this char 2266 if (pos > start) { 2267 appendable.append(charSeq, start, pos); 2268 } 2269 if (isLf) { 2270 c = 'n'; 2271 } else if (isCr) { 2272 c = 'r'; 2273 } 2274 escape(c, appendable); 2275 if (isDelimiterStart) { 2276 for (int i = 1; i < delimLength; i++) { 2277 pos++; 2278 escape(charSeq.charAt(pos), appendable); 2279 } 2280 } 2281 start = pos + 1; // start on the current char after this one 2282 } 2283 pos++; 2284 } 2285 2286 // write last segment 2287 if (pos > start) { 2288 appendable.append(charSeq, start, pos); 2289 } 2290 } 2291 2292 /* 2293 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2294 */ 2295 private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { 2296 int start = 0; 2297 int pos = 0; 2298 @SuppressWarnings("resource") // Temp reader on input reader. 2299 final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); 2300 final char[] delimArray = getDelimiterCharArray(); 2301 final int delimLength = delimArray.length; 2302 final char escape = getEscapeChar(); 2303 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 2304 int c; 2305 final char[] lookAheadBuffer = new char[delimLength - 1]; 2306 while (EOF != (c = bufferedReader.read())) { 2307 builder.append((char) c); 2308 Arrays.fill(lookAheadBuffer, (char) 0); 2309 final String test = builder.toString() + new String(bufferedReader.lookAhead(lookAheadBuffer)); 2310 final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); 2311 final boolean isCr = c == CR; 2312 final boolean isLf = c == LF; 2313 if (isCr || isLf || c == escape || isDelimiterStart) { 2314 // write out segment up until this char 2315 if (pos > start) { 2316 append(builder.substring(start, pos), appendable); 2317 builder.setLength(0); 2318 pos = -1; 2319 } 2320 if (isLf) { 2321 c = 'n'; 2322 } else if (isCr) { 2323 c = 'r'; 2324 } 2325 escape((char) c, appendable); 2326 if (isDelimiterStart) { 2327 for (int i = 1; i < delimLength; i++) { 2328 escape((char) bufferedReader.read(), appendable); 2329 } 2330 } 2331 start = pos + 1; // start on the current char after this one 2332 } 2333 pos++; 2334 } 2335 // write last segment 2336 if (pos > start) { 2337 appendable.append(builder, start, pos); 2338 } 2339 } 2340 2341 /* 2342 * Note: must only be called if quoting is enabled, otherwise will generate NPE 2343 */ 2344 // the original object is needed so can check for Number 2345 private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { 2346 boolean quote = false; 2347 int start = 0; 2348 int pos = 0; 2349 final int len = charSeq.length(); 2350 final char[] delim = getDelimiterCharArray(); 2351 final int delimLength = delim.length; 2352 final char quoteChar = getQuoteCharacter().charValue(); 2353 // If escape char not specified, default to the quote char 2354 // This avoids having to keep checking whether there is an escape character 2355 // at the cost of checking against quote twice 2356 final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; 2357 QuoteMode quoteModePolicy = getQuoteMode(); 2358 if (quoteModePolicy == null) { 2359 quoteModePolicy = QuoteMode.MINIMAL; 2360 } 2361 switch (quoteModePolicy) { 2362 case ALL: 2363 case ALL_NON_NULL: 2364 quote = true; 2365 break; 2366 case NON_NUMERIC: 2367 quote = !(object instanceof Number); 2368 break; 2369 case NONE: 2370 // Use the existing escaping code 2371 printWithEscapes(charSeq, out); 2372 return; 2373 case MINIMAL: 2374 if (len <= 0) { 2375 // Always quote an empty token that is the first 2376 // on the line, as it may be the only thing on the 2377 // line. If it were not quoted in that case, 2378 // an empty line has no tokens. 2379 if (newRecord) { 2380 quote = true; 2381 } 2382 } else { 2383 char c = charSeq.charAt(pos); 2384 if (c <= COMMENT) { 2385 // Some other chars at the start of a value caused the parser to fail, so for now 2386 // encapsulate if we start in anything less than '#'. We are being conservative 2387 // by including the default comment char too. 2388 quote = true; 2389 } else { 2390 while (pos < len) { 2391 c = charSeq.charAt(pos); 2392 if (c == LF || c == CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { 2393 quote = true; 2394 break; 2395 } 2396 pos++; 2397 } 2398 2399 if (!quote) { 2400 pos = len - 1; 2401 c = charSeq.charAt(pos); 2402 // Some other chars at the end caused the parser to fail, so for now 2403 // encapsulate if we end in anything less than ' ' 2404 if (isTrimChar(c)) { 2405 quote = true; 2406 } 2407 } 2408 } 2409 } 2410 if (!quote) { 2411 // No encapsulation needed - write out the original value 2412 out.append(charSeq, start, len); 2413 return; 2414 } 2415 break; 2416 default: 2417 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 2418 } 2419 if (!quote) { 2420 // No encapsulation needed - write out the original value 2421 out.append(charSeq, start, len); 2422 return; 2423 } 2424 // We hit something that needed encapsulation 2425 out.append(quoteChar); 2426 // Pick up where we left off: pos should be positioned on the first character that caused 2427 // the need for encapsulation. 2428 while (pos < len) { 2429 final char c = charSeq.charAt(pos); 2430 if (c == quoteChar || c == escapeChar) { 2431 // write out the chunk up until this point 2432 out.append(charSeq, start, pos); 2433 out.append(escapeChar); // now output the escape 2434 start = pos; // and restart with the matched char 2435 } 2436 pos++; 2437 } 2438 // Write the last segment 2439 out.append(charSeq, start, pos); 2440 out.append(quoteChar); 2441 } 2442 2443 /** 2444 * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. 2445 * 2446 * @param reader What to print 2447 * @param appendable Where to print it 2448 * @throws IOException If an I/O error occurs 2449 */ 2450 private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { 2451 if (getQuoteMode() == QuoteMode.NONE) { 2452 printWithEscapes(reader, appendable); 2453 return; 2454 } 2455 final char quote = getQuoteCharacter().charValue(); 2456 // (1) Append opening quote 2457 append(quote, appendable); 2458 // (2) Append Reader contents, doubling quotes 2459 int c; 2460 while (EOF != (c = reader.read())) { 2461 append((char) c, appendable); 2462 if (c == quote) { 2463 append(quote, appendable); 2464 } 2465 } 2466 // (3) Append closing quote 2467 append(quote, appendable); 2468 } 2469 2470 @Override 2471 public String toString() { 2472 final StringBuilder sb = new StringBuilder(); 2473 sb.append("Delimiter=<").append(delimiter).append('>'); 2474 if (isEscapeCharacterSet()) { 2475 sb.append(' '); 2476 sb.append("Escape=<").append(escapeCharacter).append('>'); 2477 } 2478 if (isQuoteCharacterSet()) { 2479 sb.append(' '); 2480 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 2481 } 2482 if (quoteMode != null) { 2483 sb.append(' '); 2484 sb.append("QuoteMode=<").append(quoteMode).append('>'); 2485 } 2486 if (isCommentMarkerSet()) { 2487 sb.append(' '); 2488 sb.append("CommentStart=<").append(commentMarker).append('>'); 2489 } 2490 if (isNullStringSet()) { 2491 sb.append(' '); 2492 sb.append("NullString=<").append(nullString).append('>'); 2493 } 2494 if (recordSeparator != null) { 2495 sb.append(' '); 2496 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 2497 } 2498 if (getIgnoreEmptyLines()) { 2499 sb.append(" EmptyLines:ignored"); 2500 } 2501 if (getIgnoreSurroundingSpaces()) { 2502 sb.append(" SurroundingSpaces:ignored"); 2503 } 2504 if (getIgnoreHeaderCase()) { 2505 sb.append(" IgnoreHeaderCase:ignored"); 2506 } 2507 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 2508 if (headerComments != null) { 2509 sb.append(' '); 2510 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 2511 } 2512 if (headers != null) { 2513 sb.append(' '); 2514 sb.append("Header:").append(Arrays.toString(headers)); 2515 } 2516 return sb.toString(); 2517 } 2518 2519 String trim(final String value) { 2520 return getTrim() ? value.trim() : value; 2521 } 2522 2523 /** 2524 * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. 2525 * <p> 2526 * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used 2527 * for parsing, so it cannot be used here. 2528 * </p> 2529 * 2530 * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. 2531 */ 2532 private void validate() throws IllegalArgumentException { 2533 if (containsLineBreak(delimiter)) { 2534 throw new IllegalArgumentException("The delimiter cannot be a line break"); 2535 } 2536 if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { 2537 throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 2538 } 2539 if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { 2540 throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 2541 } 2542 if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { 2543 throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 2544 } 2545 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 2546 throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 2547 } 2548 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 2549 throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 2550 } 2551 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 2552 throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); 2553 } 2554 // Validate headers 2555 if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { 2556 final Set<String> dupCheckSet = new HashSet<>(headers.length); 2557 final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; 2558 for (final String header : headers) { 2559 final boolean blank = isBlank(header); 2560 // Sanitize all empty headers to the empty string "" when checking duplicates 2561 final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); 2562 if (containsHeader && !(blank && emptyDuplicatesAllowed)) { 2563 throw new IllegalArgumentException( 2564 String.format( 2565 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", 2566 header, Arrays.toString(headers))); 2567 } 2568 } 2569 } 2570 } 2571 2572 /** 2573 * Builds a new {@code CSVFormat} that allows duplicate header names. 2574 * 2575 * @return a new {@code CSVFormat} that allows duplicate header names 2576 * @since 1.7 2577 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} 2578 */ 2579 @Deprecated 2580 public CSVFormat withAllowDuplicateHeaderNames() { 2581 return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build(); 2582 } 2583 2584 /** 2585 * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2586 * 2587 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 2588 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2589 * @since 1.7 2590 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} 2591 */ 2592 @Deprecated 2593 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 2594 final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; 2595 return builder().setDuplicateHeaderMode(mode).build(); 2596 } 2597 2598 /** 2599 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. 2600 * 2601 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2602 * @see Builder#setAllowMissingColumnNames(boolean) 2603 * @since 1.1 2604 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} 2605 */ 2606 @Deprecated 2607 public CSVFormat withAllowMissingColumnNames() { 2608 return builder().setAllowMissingColumnNames(true).build(); 2609 } 2610 2611 /** 2612 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 2613 * 2614 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause 2615 * an {@link IllegalArgumentException} to be thrown. 2616 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2617 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} 2618 */ 2619 @Deprecated 2620 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 2621 return builder().setAllowMissingColumnNames(allowMissingColumnNames).build(); 2622 } 2623 2624 /** 2625 * Builds a new {@code CSVFormat} with whether to flush on close. 2626 * 2627 * @param autoFlush whether to flush on close. 2628 * 2629 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 2630 * @since 1.6 2631 * @deprecated Use {@link Builder#setAutoFlush(boolean)} 2632 */ 2633 @Deprecated 2634 public CSVFormat withAutoFlush(final boolean autoFlush) { 2635 return builder().setAutoFlush(autoFlush).build(); 2636 } 2637 2638 /** 2639 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2640 * 2641 * Note that the comment start character is only recognized at the start of a line. 2642 * 2643 * @param commentMarker the comment start marker 2644 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2645 * @throws IllegalArgumentException thrown if the specified character is a line break 2646 * @deprecated Use {@link Builder#setCommentMarker(char)} 2647 */ 2648 @Deprecated 2649 public CSVFormat withCommentMarker(final char commentMarker) { 2650 return builder().setCommentMarker(commentMarker).build(); 2651 } 2652 2653 /** 2654 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2655 * 2656 * Note that the comment start character is only recognized at the start of a line. 2657 * 2658 * @param commentMarker the comment start marker, use {@code null} to disable 2659 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2660 * @throws IllegalArgumentException thrown if the specified character is a line break 2661 * @deprecated Use {@link Builder#setCommentMarker(Character)} 2662 */ 2663 @Deprecated 2664 public CSVFormat withCommentMarker(final Character commentMarker) { 2665 return builder().setCommentMarker(commentMarker).build(); 2666 } 2667 2668 /** 2669 * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. 2670 * 2671 * @param delimiter the delimiter character 2672 * @return A new CSVFormat that is equal to this with the specified character as a delimiter 2673 * @throws IllegalArgumentException thrown if the specified character is a line break 2674 * @deprecated Use {@link Builder#setDelimiter(char)} 2675 */ 2676 @Deprecated 2677 public CSVFormat withDelimiter(final char delimiter) { 2678 return builder().setDelimiter(delimiter).build(); 2679 } 2680 2681 /** 2682 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2683 * 2684 * @param escape the escape character 2685 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2686 * @throws IllegalArgumentException thrown if the specified character is a line break 2687 * @deprecated Use {@link Builder#setEscape(char)} 2688 */ 2689 @Deprecated 2690 public CSVFormat withEscape(final char escape) { 2691 return builder().setEscape(escape).build(); 2692 } 2693 2694 /** 2695 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2696 * 2697 * @param escape the escape character, use {@code null} to disable 2698 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2699 * @throws IllegalArgumentException thrown if the specified character is a line break 2700 * @deprecated Use {@link Builder#setEscape(Character)} 2701 */ 2702 @Deprecated 2703 public CSVFormat withEscape(final Character escape) { 2704 return builder().setEscape(escape).build(); 2705 } 2706 2707 /** 2708 * Builds a new {@code CSVFormat} using the first record as header. 2709 * 2710 * <p> 2711 * Calling this method is equivalent to calling: 2712 * </p> 2713 * 2714 * <pre> 2715 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); 2716 * </pre> 2717 * 2718 * @return A new CSVFormat that is equal to this but using the first record as header. 2719 * @see Builder#setSkipHeaderRecord(boolean) 2720 * @see Builder#setHeader(String...) 2721 * @since 1.3 2722 * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. 2723 */ 2724 @Deprecated 2725 public CSVFormat withFirstRecordAsHeader() { 2726 // @formatter:off 2727 return builder() 2728 .setHeader() 2729 .setSkipHeaderRecord(true) 2730 .build(); 2731 // @formatter:on 2732 } 2733 2734 /** 2735 * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. 2736 * 2737 * <p> 2738 * Example: 2739 * </p> 2740 * 2741 * <pre> 2742 * public enum Header { 2743 * Name, Email, Phone 2744 * } 2745 * 2746 * CSVFormat format = aformat.withHeader(Header.class); 2747 * </pre> 2748 * <p> 2749 * The header is also used by the {@link CSVPrinter}. 2750 * </p> 2751 * 2752 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2753 * @return A new CSVFormat that is equal to this but with the specified header 2754 * @see Builder#setHeader(String...) 2755 * @see Builder#setSkipHeaderRecord(boolean) 2756 * @since 1.3 2757 * @deprecated Use {@link Builder#setHeader(Class)} 2758 */ 2759 @Deprecated 2760 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 2761 return builder().setHeader(headerEnum).build(); 2762 } 2763 2764 /** 2765 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2766 * input file with: 2767 * 2768 * <pre> 2769 * CSVFormat format = aformat.withHeader(); 2770 * </pre> 2771 * 2772 * or specified manually with: 2773 * 2774 * <pre> 2775 * CSVFormat format = aformat.withHeader(resultSet); 2776 * </pre> 2777 * <p> 2778 * The header is also used by the {@link CSVPrinter}. 2779 * </p> 2780 * 2781 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2782 * @return A new CSVFormat that is equal to this but with the specified header 2783 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2784 * @since 1.1 2785 * @deprecated Use {@link Builder#setHeader(ResultSet)} 2786 */ 2787 @Deprecated 2788 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 2789 return builder().setHeader(resultSet).build(); 2790 } 2791 2792 /** 2793 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2794 * input file with: 2795 * 2796 * <pre> 2797 * CSVFormat format = aformat.withHeader(); 2798 * </pre> 2799 * 2800 * or specified manually with: 2801 * 2802 * <pre> 2803 * CSVFormat format = aformat.withHeader(metaData); 2804 * </pre> 2805 * <p> 2806 * The header is also used by the {@link CSVPrinter}. 2807 * </p> 2808 * 2809 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2810 * @return A new CSVFormat that is equal to this but with the specified header 2811 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2812 * @since 1.1 2813 * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} 2814 */ 2815 @Deprecated 2816 public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 2817 return builder().setHeader(resultSetMetaData).build(); 2818 } 2819 2820 /** 2821 * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file 2822 * with: 2823 * 2824 * <pre> 2825 * CSVFormat format = aformat.withHeader(); 2826 * </pre> 2827 * 2828 * or specified manually with: 2829 * 2830 * <pre> 2831 * CSVFormat format = aformat.withHeader("name", "email", "phone"); 2832 * </pre> 2833 * <p> 2834 * The header is also used by the {@link CSVPrinter}. 2835 * </p> 2836 * 2837 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2838 * @return A new CSVFormat that is equal to this but with the specified header 2839 * @see Builder#setSkipHeaderRecord(boolean) 2840 * @deprecated Use {@link Builder#setHeader(String...)} 2841 */ 2842 @Deprecated 2843 public CSVFormat withHeader(final String... header) { 2844 return builder().setHeader(header).build(); 2845 } 2846 2847 /** 2848 * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. 2849 * This setting is ignored by the parser. 2850 * 2851 * <pre> 2852 * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV.", Instant.now()); 2853 * </pre> 2854 * 2855 * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. 2856 * @return A new CSVFormat that is equal to this but with the specified header 2857 * @see Builder#setSkipHeaderRecord(boolean) 2858 * @since 1.1 2859 * @deprecated Use {@link Builder#setHeaderComments(Object...)} 2860 */ 2861 @Deprecated 2862 public CSVFormat withHeaderComments(final Object... headerComments) { 2863 return builder().setHeaderComments(headerComments).build(); 2864 } 2865 2866 /** 2867 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2868 * 2869 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2870 * @see Builder#setIgnoreEmptyLines(boolean) 2871 * @since 1.1 2872 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} 2873 */ 2874 @Deprecated 2875 public CSVFormat withIgnoreEmptyLines() { 2876 return builder().setIgnoreEmptyLines(true).build(); 2877 } 2878 2879 /** 2880 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2881 * 2882 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty 2883 * lines to empty records. 2884 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2885 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} 2886 */ 2887 @Deprecated 2888 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2889 return builder().setIgnoreEmptyLines(ignoreEmptyLines).build(); 2890 } 2891 2892 /** 2893 * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2894 * 2895 * @return A new CSVFormat that will ignore the new case header name behavior. 2896 * @see Builder#setIgnoreHeaderCase(boolean) 2897 * @since 1.3 2898 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} 2899 */ 2900 @Deprecated 2901 public CSVFormat withIgnoreHeaderCase() { 2902 return builder().setIgnoreHeaderCase(true).build(); 2903 } 2904 2905 /** 2906 * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2907 * 2908 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 2909 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2910 * @since 1.3 2911 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} 2912 */ 2913 @Deprecated 2914 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2915 return builder().setIgnoreHeaderCase(ignoreHeaderCase).build(); 2916 } 2917 2918 /** 2919 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. 2920 * 2921 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. 2922 * @see Builder#setIgnoreSurroundingSpaces(boolean) 2923 * @since 1.1 2924 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} 2925 */ 2926 @Deprecated 2927 public CSVFormat withIgnoreSurroundingSpaces() { 2928 return builder().setIgnoreSurroundingSpaces(true).build(); 2929 } 2930 2931 /** 2932 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. 2933 * 2934 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 2935 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2936 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} 2937 */ 2938 @Deprecated 2939 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2940 return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).build(); 2941 } 2942 2943 /** 2944 * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2945 * <ul> 2946 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 2947 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2948 * </ul> 2949 * 2950 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} 2951 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2952 * @deprecated Use {@link Builder#setNullString(String)} 2953 */ 2954 @Deprecated 2955 public CSVFormat withNullString(final String nullString) { 2956 return builder().setNullString(nullString).build(); 2957 } 2958 2959 /** 2960 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2961 * 2962 * @param quoteChar the quote character 2963 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2964 * @throws IllegalArgumentException thrown if the specified character is a line break 2965 * @deprecated Use {@link Builder#setQuote(char)} 2966 */ 2967 @Deprecated 2968 public CSVFormat withQuote(final char quoteChar) { 2969 return builder().setQuote(quoteChar).build(); 2970 } 2971 2972 /** 2973 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2974 * 2975 * @param quoteChar the quote character, use {@code null} to disable. 2976 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2977 * @throws IllegalArgumentException thrown if the specified character is a line break 2978 * @deprecated Use {@link Builder#setQuote(Character)} 2979 */ 2980 @Deprecated 2981 public CSVFormat withQuote(final Character quoteChar) { 2982 return builder().setQuote(quoteChar).build(); 2983 } 2984 2985 /** 2986 * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 2987 * 2988 * @param quoteMode the quote policy to use for output. 2989 * 2990 * @return A new CSVFormat that is equal to this but with the specified quote policy 2991 * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} 2992 */ 2993 @Deprecated 2994 public CSVFormat withQuoteMode(final QuoteMode quoteMode) { 2995 return builder().setQuoteMode(quoteMode).build(); 2996 } 2997 2998 /** 2999 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. 3000 * 3001 * <p> 3002 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3003 * "\r\n" 3004 * </p> 3005 * 3006 * @param recordSeparator the record separator to use for output. 3007 * @return A new CSVFormat that is equal to this but with the specified output record separator 3008 * @deprecated Use {@link Builder#setRecordSeparator(char)} 3009 */ 3010 @Deprecated 3011 public CSVFormat withRecordSeparator(final char recordSeparator) { 3012 return builder().setRecordSeparator(recordSeparator).build(); 3013 } 3014 3015 /** 3016 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. 3017 * 3018 * <p> 3019 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3020 * "\r\n" 3021 * </p> 3022 * 3023 * @param recordSeparator the record separator to use for output. 3024 * @return A new CSVFormat that is equal to this but with the specified output record separator 3025 * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF 3026 * @deprecated Use {@link Builder#setRecordSeparator(String)} 3027 */ 3028 @Deprecated 3029 public CSVFormat withRecordSeparator(final String recordSeparator) { 3030 return builder().setRecordSeparator(recordSeparator).build(); 3031 } 3032 3033 /** 3034 * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. 3035 * 3036 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3037 * @see Builder#setSkipHeaderRecord(boolean) 3038 * @see Builder#setHeader(String...) 3039 * @since 1.1 3040 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} 3041 */ 3042 @Deprecated 3043 public CSVFormat withSkipHeaderRecord() { 3044 return builder().setSkipHeaderRecord(true).build(); 3045 } 3046 3047 /** 3048 * Builds a new {@code CSVFormat} with whether to skip the header record. 3049 * 3050 * @param skipHeaderRecord whether to skip the header record. 3051 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3052 * @see Builder#setHeader(String...) 3053 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} 3054 */ 3055 @Deprecated 3056 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 3057 return builder().setSkipHeaderRecord(skipHeaderRecord).build(); 3058 } 3059 3060 /** 3061 * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows 3062 * and LF on Linux. 3063 * 3064 * <p> 3065 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3066 * "\r\n" 3067 * </p> 3068 * 3069 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 3070 * @since 1.6 3071 * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} 3072 */ 3073 @Deprecated 3074 public CSVFormat withSystemRecordSeparator() { 3075 return builder().setRecordSeparator(System.lineSeparator()).build(); 3076 } 3077 3078 /** 3079 * Builds a new {@code CSVFormat} to add a trailing delimiter. 3080 * 3081 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 3082 * @since 1.3 3083 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} 3084 */ 3085 @Deprecated 3086 public CSVFormat withTrailingDelimiter() { 3087 return builder().setTrailingDelimiter(true).build(); 3088 } 3089 3090 /** 3091 * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. 3092 * 3093 * @param trailingDelimiter whether to add a trailing delimiter. 3094 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 3095 * @since 1.3 3096 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} 3097 */ 3098 @Deprecated 3099 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 3100 return builder().setTrailingDelimiter(trailingDelimiter).build(); 3101 } 3102 3103 /** 3104 * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3105 * 3106 * @return A new CSVFormat that is equal to this but with the trim setting on. 3107 * @since 1.3 3108 * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} 3109 */ 3110 @Deprecated 3111 public CSVFormat withTrim() { 3112 return builder().setTrim(true).build(); 3113 } 3114 3115 /** 3116 * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3117 * 3118 * @param trim whether to trim leading and trailing blanks. 3119 * @return A new CSVFormat that is equal to this but with the specified trim setting. 3120 * @since 1.3 3121 * @deprecated Use {@link Builder#setTrim(boolean)} 3122 */ 3123 @Deprecated 3124 public CSVFormat withTrim(final boolean trim) { 3125 return builder().setTrim(trim).build(); 3126 } 3127 }