View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.BACKSLASH;
21  import static org.apache.commons.csv.Constants.COMMA;
22  import static org.apache.commons.csv.Constants.COMMENT;
23  import static org.apache.commons.csv.Constants.EMPTY;
24  import static org.apache.commons.csv.Constants.CR;
25  import static org.apache.commons.csv.Constants.CRLF;
26  import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
27  import static org.apache.commons.csv.Constants.LF;
28  import static org.apache.commons.csv.Constants.PIPE;
29  import static org.apache.commons.csv.Constants.SP;
30  import static org.apache.commons.csv.Constants.TAB;
31  
32  import java.io.File;
33  import java.io.FileOutputStream;
34  import java.io.IOException;
35  import java.io.OutputStreamWriter;
36  import java.io.Reader;
37  import java.io.Serializable;
38  import java.io.StringWriter;
39  import java.nio.charset.Charset;
40  import java.nio.file.Files;
41  import java.nio.file.Path;
42  import java.sql.ResultSet;
43  import java.sql.ResultSetMetaData;
44  import java.sql.SQLException;
45  import java.util.Arrays;
46  import java.util.HashSet;
47  import java.util.Set;
48  
49  /**
50   * Specifies the format of a CSV file and parses input.
51   *
52   * <h2>Using predefined formats</h2>
53   *
54   * <p>
55   * You can use one of the predefined formats:
56   * </p>
57   *
58   * <ul>
59   * <li>{@link #DEFAULT}</li>
60   * <li>{@link #EXCEL}</li>
61   * <li>{@link #MYSQL}</li>
62   * <li>{@link #RFC4180}</li>
63   * <li>{@link #TDF}</li>
64   * </ul>
65   *
66   * <p>
67   * For example:
68   * </p>
69   *
70   * <pre>
71   * CSVParser parser = CSVFormat.EXCEL.parse(reader);
72   * </pre>
73   *
74   * <p>
75   * The {@link CSVParser} provides static methods to parse other input types, for example:
76   * </p>
77   *
78   * <pre>
79   * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
80   * </pre>
81   *
82   * <h2>Defining formats</h2>
83   *
84   * <p>
85   * You can extend a format by calling the {@code with} methods. For example:
86   * </p>
87   *
88   * <pre>
89   * CSVFormat.EXCEL.withNullString(&quot;N/A&quot;).withIgnoreSurroundingSpaces(true);
90   * </pre>
91   *
92   * <h2>Defining column names</h2>
93   *
94   * <p>
95   * To define the column names you want to use to access records, write:
96   * </p>
97   *
98   * <pre>
99   * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;);
100  * </pre>
101  *
102  * <p>
103  * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and
104  * assumes that your CSV source does not contain a first record that also defines column names.
105  *
106  * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
107  * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
108  * </p>
109  *
110  * <h2>Parsing</h2>
111  *
112  * <p>
113  * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
114  * </p>
115  *
116  * <pre>
117  * Reader in = ...;
118  * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;).parse(in);
119  * </pre>
120  *
121  * <p>
122  * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
123  * </p>
124  *
125  * <h2>Referencing columns safely</h2>
126  *
127  * <p>
128  * If your source contains a header record, you can simplify your code and safely reference columns, by using
129  * {@link #withHeader(String...)} with no arguments:
130  * </p>
131  *
132  * <pre>
133  * CSVFormat.EXCEL.withHeader();
134  * </pre>
135  *
136  * <p>
137  * This causes the parser to read the first record and use its values as column names.
138  *
139  * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
140  * </p>
141  *
142  * <pre>
143  * String value = record.get(&quot;Col1&quot;);
144  * </pre>
145  *
146  * <p>
147  * This makes your code impervious to changes in column order in the CSV file.
148  * </p>
149  *
150  * <h2>Notes</h2>
151  *
152  * <p>
153  * This class is immutable.
154  * </p>
155  */
156 public final class CSVFormat implements Serializable {
157 
158     /**
159      * Predefines formats.
160      *
161      * @since 1.2
162      */
163     public enum Predefined {
164 
165         /**
166          * @see CSVFormat#DEFAULT
167          */
168         Default(CSVFormat.DEFAULT),
169 
170         /**
171          * @see CSVFormat#EXCEL
172          */
173         Excel(CSVFormat.EXCEL),
174 
175         /**
176          * @see CSVFormat#INFORMIX_UNLOAD
177          * @since 1.3
178          */
179         InformixUnload(CSVFormat.INFORMIX_UNLOAD),
180 
181         /**
182          * @see CSVFormat#INFORMIX_UNLOAD_CSV
183          * @since 1.3
184          */
185         InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV),
186 
187         /**
188          * @see CSVFormat#MYSQL
189          */
190         MySQL(CSVFormat.MYSQL),
191 
192         /**
193          * @see CSVFormat#POSTGRESQL_CSV
194          * @since 1.5
195          */
196         PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV),
197 
198         /**
199          * @see CSVFormat#POSTGRESQL_CSV
200          */
201         PostgreSQLText(CSVFormat.POSTGRESQL_TEXT),
202 
203         /**
204          * @see CSVFormat#RFC4180
205          */
206         RFC4180(CSVFormat.RFC4180),
207 
208         /**
209          * @see CSVFormat#TDF
210          */
211         TDF(CSVFormat.TDF);
212 
213         private final CSVFormat format;
214 
215         Predefined(final CSVFormat format) {
216             this.format = format;
217         }
218 
219         /**
220          * Gets the format.
221          *
222          * @return the format.
223          */
224         public CSVFormat getFormat() {
225             return format;
226         }
227     }
228 
229     /**
230      * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
231      *
232      * <p>
233      * Settings are:
234      * </p>
235      * <ul>
236      * <li>withDelimiter(',')</li>
237      * <li>withQuote('"')</li>
238      * <li>withRecordSeparator("\r\n")</li>
239      * <li>withIgnoreEmptyLines(true)</li>
240      * </ul>
241      *
242      * @see Predefined#Default
243      */
244     public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
245             null, null, null, false, false, false, false, false);
246 
247     /**
248      * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
249      * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
250      *
251      * <p>
252      * For example for parsing or generating a CSV file on a French system the following format will be used:
253      * </p>
254      *
255      * <pre>
256      * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
257      * </pre>
258      *
259      * <p>
260      * Settings are:
261      * </p>
262      * <ul>
263      * <li>{@link #withDelimiter(char) withDelimiter(',')}</li>
264      * <li>{@link #withQuote(char) withQuote('"')}</li>
265      * <li>{@link #withRecordSeparator(String) withRecordSeparator("\r\n")}</li>
266      * <li>{@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}</li>
267      * <li>{@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}</li>
268      * </ul>
269      * <p>
270      * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
271      * withAllowMissingColumnNames(true)}.
272      * </p>
273      *
274      * @see Predefined#Excel
275      */
276     // @formatter:off
277     public static final CSVFormat EXCEL = DEFAULT
278             .withIgnoreEmptyLines(false)
279             .withAllowMissingColumnNames();
280     // @formatter:on
281 
282     /**
283      * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
284      *
285      * <p>
286      * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
287      * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
288      * </p>
289      *
290      * <p>
291      * Settings are:
292      * </p>
293      * <ul>
294      * <li>withDelimiter(',')</li>
295      * <li>withQuote("\"")</li>
296      * <li>withRecordSeparator('\n')</li>
297      * <li>withEscape('\\')</li>
298      * </ul>
299      *
300      * @see Predefined#MySQL
301      * @see <a href=
302      *      "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
303      *      http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>;
304      * @since 1.3
305      */
306     // @formatter:off
307     public static final CSVFormat INFORMIX_UNLOAD = DEFAULT
308             .withDelimiter(PIPE)
309             .withEscape(BACKSLASH)
310             .withQuote(DOUBLE_QUOTE_CHAR)
311             .withRecordSeparator(LF);
312     // @formatter:on
313 
314     /**
315      * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
316      *
317      * <p>
318      * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
319      * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
320      * </p>
321      *
322      * <p>
323      * Settings are:
324      * </p>
325      * <ul>
326      * <li>withDelimiter(',')</li>
327      * <li>withQuote("\"")</li>
328      * <li>withRecordSeparator('\n')</li>
329      * </ul>
330      *
331      * @see Predefined#MySQL
332      * @see <a href=
333      *      "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
334      *      http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>;
335      * @since 1.3
336      */
337     // @formatter:off
338     public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT
339             .withDelimiter(COMMA)
340             .withQuote(DOUBLE_QUOTE_CHAR)
341             .withRecordSeparator(LF);
342     // @formatter:on
343 
344     /**
345      * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
346      *
347      * <p>
348      * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
349      * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
350      * </p>
351      *
352      * <p>
353      * Settings are:
354      * </p>
355      * <ul>
356      * <li>withDelimiter('\t')</li>
357      * <li>withQuote(null)</li>
358      * <li>withRecordSeparator('\n')</li>
359      * <li>withIgnoreEmptyLines(false)</li>
360      * <li>withEscape('\\')</li>
361      * <li>withNullString("\\N")</li>
362      * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
363      * </ul>
364      *
365      * @see Predefined#MySQL
366      * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
367      *      -data.html</a>
368      */
369     // @formatter:off
370     public static final CSVFormat MYSQL = DEFAULT
371             .withDelimiter(TAB)
372             .withEscape(BACKSLASH)
373             .withIgnoreEmptyLines(false)
374             .withQuote(null)
375             .withRecordSeparator(LF)
376             .withNullString("\\N")
377             .withQuoteMode(QuoteMode.ALL_NON_NULL);
378     // @formatter:off
379 
380     /**
381      * Default PostgreSQL CSV format used by the {@code COPY} operation.
382      *
383      * <p>
384      * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
385      * characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
386      * </p>
387      *
388      * <p>
389      * Settings are:
390      * </p>
391      * <ul>
392      * <li>withDelimiter(',')</li>
393      * <li>withQuote('"')</li>
394      * <li>withRecordSeparator('\n')</li>
395      * <li>withIgnoreEmptyLines(false)</li>
396      * <li>withEscape('\\')</li>
397      * <li>withNullString("")</li>
398      * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
399      * </ul>
400      *
401      * @see Predefined#MySQL
402      * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
403      *      -data.html</a>
404      * @since 1.5
405      */
406     // @formatter:off
407     public static final CSVFormat POSTGRESQL_CSV = DEFAULT
408             .withDelimiter(COMMA)
409             .withEscape(DOUBLE_QUOTE_CHAR)
410             .withIgnoreEmptyLines(false)
411             .withQuote(DOUBLE_QUOTE_CHAR)
412             .withRecordSeparator(LF)
413             .withNullString(EMPTY)
414             .withQuoteMode(QuoteMode.ALL_NON_NULL);
415     // @formatter:off
416 
417     /**
418      * Default PostgreSQL text format used by the {@code COPY} operation.
419      *
420      * <p>
421      * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
422      * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
423      * </p>
424      *
425      * <p>
426      * Settings are:
427      * </p>
428      * <ul>
429      * <li>withDelimiter('\t')</li>
430      * <li>withQuote('"')</li>
431      * <li>withRecordSeparator('\n')</li>
432      * <li>withIgnoreEmptyLines(false)</li>
433      * <li>withEscape('\\')</li>
434      * <li>withNullString("\\N")</li>
435      * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
436      * </ul>
437      *
438      * @see Predefined#MySQL
439      * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
440      *      -data.html</a>
441      * @since 1.5
442      */
443     // @formatter:off
444     public static final CSVFormat POSTGRESQL_TEXT = DEFAULT
445             .withDelimiter(TAB)
446             .withEscape(DOUBLE_QUOTE_CHAR)
447             .withIgnoreEmptyLines(false)
448             .withQuote(DOUBLE_QUOTE_CHAR)
449             .withRecordSeparator(LF)
450             .withNullString("\\N")
451             .withQuoteMode(QuoteMode.ALL_NON_NULL);
452     // @formatter:off
453 
454     /**
455      * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
456      *
457      * <p>
458      * Settings are:
459      * </p>
460      * <ul>
461      * <li>withDelimiter(',')</li>
462      * <li>withQuote('"')</li>
463      * <li>withRecordSeparator("\r\n")</li>
464      * <li>withIgnoreEmptyLines(false)</li>
465      * </ul>
466      *
467      * @see Predefined#RFC4180
468      */
469     public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
470 
471     private static final long serialVersionUID = 1L;
472 
473     /**
474      * Tab-delimited format.
475      *
476      * <p>
477      * Settings are:
478      * </p>
479      * <ul>
480      * <li>withDelimiter('\t')</li>
481      * <li>withQuote('"')</li>
482      * <li>withRecordSeparator("\r\n")</li>
483      * <li>withIgnoreSurroundingSpaces(true)</li>
484      * </ul>
485      *
486      * @see Predefined#TDF
487      */
488     // @formatter:off
489     public static final CSVFormat TDF = DEFAULT
490             .withDelimiter(TAB)
491             .withIgnoreSurroundingSpaces();
492     // @formatter:on
493 
494     /**
495      * Returns true if the given character is a line break character.
496      *
497      * @param c
498      *            the character to check
499      *
500      * @return true if <code>c</code> is a line break character
501      */
502     private static boolean isLineBreak(final char c) {
503         return c == LF || c == CR;
504     }
505 
506     /**
507      * Returns true if the given character is a line break character.
508      *
509      * @param c
510      *            the character to check, may be null
511      *
512      * @return true if <code>c</code> is a line break character (and not null)
513      */
514     private static boolean isLineBreak(final Character c) {
515         return c != null && isLineBreak(c.charValue());
516     }
517 
518     /**
519      * Creates a new CSV format with the specified delimiter.
520      *
521      * <p>
522      * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized
523      * with null/false.
524      * </p>
525      *
526      * @param delimiter
527      *            the char used for value separation, must not be a line break character
528      * @return a new CSV format.
529      * @throws IllegalArgumentException
530      *             if the delimiter is a line break character
531      *
532      * @see #DEFAULT
533      * @see #RFC4180
534      * @see #MYSQL
535      * @see #EXCEL
536      * @see #TDF
537      */
538     public static CSVFormat newFormat(final char delimiter) {
539         return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
540                 false, false, false);
541     }
542 
543     /**
544      * Gets one of the predefined formats from {@link CSVFormat.Predefined}.
545      *
546      * @param format
547      *            name
548      * @return one of the predefined formats
549      * @since 1.2
550      */
551     public static CSVFormat valueOf(final String format) {
552         return CSVFormat.Predefined.valueOf(format).getFormat();
553     }
554 
555     private final boolean allowMissingColumnNames;
556 
557     private final Character commentMarker; // null if commenting is disabled
558 
559     private final char delimiter;
560 
561     private final Character escapeCharacter; // null if escaping is disabled
562 
563     private final String[] header; // array of header column names
564 
565     private final String[] headerComments; // array of header comment lines
566 
567     private final boolean ignoreEmptyLines;
568 
569     private final boolean ignoreHeaderCase; // should ignore header names case
570 
571     private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
572 
573     private final String nullString; // the string to be used for null values
574 
575     private final Character quoteCharacter; // null if quoting is disabled
576 
577     private final QuoteMode quoteMode;
578 
579     private final String recordSeparator; // for outputs
580 
581     private final boolean skipHeaderRecord;
582 
583     private final boolean trailingDelimiter;
584 
585     private final boolean trim;
586 
587     /**
588      * Creates a customized CSV format.
589      *
590      * @param delimiter
591      *            the char used for value separation, must not be a line break character
592      * @param quoteChar
593      *            the Character used as value encapsulation marker, may be {@code null} to disable
594      * @param quoteMode
595      *            the quote mode
596      * @param commentStart
597      *            the Character used for comment identification, may be {@code null} to disable
598      * @param escape
599      *            the Character used to escape special characters in values, may be {@code null} to disable
600      * @param ignoreSurroundingSpaces
601      *            {@code true} when whitespaces enclosing values should be ignored
602      * @param ignoreEmptyLines
603      *            {@code true} when the parser should skip empty lines
604      * @param recordSeparator
605      *            the line separator to use for output
606      * @param nullString
607      *            the line separator to use for output
608      * @param headerComments
609      *            the comments to be printed by the Printer before the actual CSV data
610      * @param header
611      *            the header
612      * @param skipHeaderRecord
613      *            TODO
614      * @param allowMissingColumnNames
615      *            TODO
616      * @param ignoreHeaderCase
617      *            TODO
618      * @param trim
619      *            TODO
620      * @param trailingDelimiter
621      *            TODO
622      * @throws IllegalArgumentException
623      *             if the delimiter is a line break character
624      */
625     private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode,
626             final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces,
627             final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
628             final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
629             final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
630             final boolean trailingDelimiter) {
631         this.delimiter = delimiter;
632         this.quoteCharacter = quoteChar;
633         this.quoteMode = quoteMode;
634         this.commentMarker = commentStart;
635         this.escapeCharacter = escape;
636         this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
637         this.allowMissingColumnNames = allowMissingColumnNames;
638         this.ignoreEmptyLines = ignoreEmptyLines;
639         this.recordSeparator = recordSeparator;
640         this.nullString = nullString;
641         this.headerComments = toStringArray(headerComments);
642         this.header = header == null ? null : header.clone();
643         this.skipHeaderRecord = skipHeaderRecord;
644         this.ignoreHeaderCase = ignoreHeaderCase;
645         this.trailingDelimiter = trailingDelimiter;
646         this.trim = trim;
647         validate();
648     }
649 
650     @Override
651     public boolean equals(final Object obj) {
652         if (this == obj) {
653             return true;
654         }
655         if (obj == null) {
656             return false;
657         }
658         if (getClass() != obj.getClass()) {
659             return false;
660         }
661 
662         final CSVFormat other = (CSVFormat) obj;
663         if (delimiter != other.delimiter) {
664             return false;
665         }
666         if (quoteMode != other.quoteMode) {
667             return false;
668         }
669         if (quoteCharacter == null) {
670             if (other.quoteCharacter != null) {
671                 return false;
672             }
673         } else if (!quoteCharacter.equals(other.quoteCharacter)) {
674             return false;
675         }
676         if (commentMarker == null) {
677             if (other.commentMarker != null) {
678                 return false;
679             }
680         } else if (!commentMarker.equals(other.commentMarker)) {
681             return false;
682         }
683         if (escapeCharacter == null) {
684             if (other.escapeCharacter != null) {
685                 return false;
686             }
687         } else if (!escapeCharacter.equals(other.escapeCharacter)) {
688             return false;
689         }
690         if (nullString == null) {
691             if (other.nullString != null) {
692                 return false;
693             }
694         } else if (!nullString.equals(other.nullString)) {
695             return false;
696         }
697         if (!Arrays.equals(header, other.header)) {
698             return false;
699         }
700         if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
701             return false;
702         }
703         if (ignoreEmptyLines != other.ignoreEmptyLines) {
704             return false;
705         }
706         if (skipHeaderRecord != other.skipHeaderRecord) {
707             return false;
708         }
709         if (recordSeparator == null) {
710             if (other.recordSeparator != null) {
711                 return false;
712             }
713         } else if (!recordSeparator.equals(other.recordSeparator)) {
714             return false;
715         }
716         return true;
717     }
718 
719     /**
720      * Formats the specified values.
721      *
722      * @param values
723      *            the values to format
724      * @return the formatted values
725      */
726     public String format(final Object... values) {
727         final StringWriter out = new StringWriter();
728         try (final CSVPrinter csvPrinter = new CSVPrinter(out, this)) {
729             csvPrinter.printRecord(values);
730             return out.toString().trim();
731         } catch (final IOException e) {
732             // should not happen because a StringWriter does not do IO.
733             throw new IllegalStateException(e);
734         }
735     }
736 
737     /**
738      * Specifies whether missing column names are allowed when parsing the header line.
739      *
740      * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
741      *         {@link IllegalArgumentException}.
742      */
743     public boolean getAllowMissingColumnNames() {
744         return allowMissingColumnNames;
745     }
746 
747     /**
748      * Returns the character marking the start of a line comment.
749      *
750      * @return the comment start marker, may be {@code null}
751      */
752     public Character getCommentMarker() {
753         return commentMarker;
754     }
755 
756     /**
757      * Returns the character delimiting the values (typically ';', ',' or '\t').
758      *
759      * @return the delimiter character
760      */
761     public char getDelimiter() {
762         return delimiter;
763     }
764 
765     /**
766      * Returns the escape character.
767      *
768      * @return the escape character, may be {@code null}
769      */
770     public Character getEscapeCharacter() {
771         return escapeCharacter;
772     }
773 
774     /**
775      * Returns a copy of the header array.
776      *
777      * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
778      */
779     public String[] getHeader() {
780         return header != null ? header.clone() : null;
781     }
782 
783     /**
784      * Returns a copy of the header comment array.
785      *
786      * @return a copy of the header comment array; {@code null} if disabled.
787      */
788     public String[] getHeaderComments() {
789         return headerComments != null ? headerComments.clone() : null;
790     }
791 
792     /**
793      * Specifies whether empty lines between records are ignored when parsing input.
794      *
795      * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
796      *         records.
797      */
798     public boolean getIgnoreEmptyLines() {
799         return ignoreEmptyLines;
800     }
801 
802     /**
803      * Specifies whether header names will be accessed ignoring case.
804      *
805      * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive.
806      * @since 1.3
807      */
808     public boolean getIgnoreHeaderCase() {
809         return ignoreHeaderCase;
810     }
811 
812     /**
813      * Specifies whether spaces around values are ignored when parsing input.
814      *
815      * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
816      */
817     public boolean getIgnoreSurroundingSpaces() {
818         return ignoreSurroundingSpaces;
819     }
820 
821     /**
822      * Gets the String to convert to and from {@code null}.
823      * <ul>
824      * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
825      * records.</li>
826      * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
827      * </ul>
828      *
829      * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
830      */
831     public String getNullString() {
832         return nullString;
833     }
834 
835     /**
836      * Returns the character used to encapsulate values containing special characters.
837      *
838      * @return the quoteChar character, may be {@code null}
839      */
840     public Character getQuoteCharacter() {
841         return quoteCharacter;
842     }
843 
844     /**
845      * Returns the quote policy output fields.
846      *
847      * @return the quote policy
848      */
849     public QuoteMode getQuoteMode() {
850         return quoteMode;
851     }
852 
853     /**
854      * Returns the record separator delimiting output records.
855      *
856      * @return the record separator
857      */
858     public String getRecordSeparator() {
859         return recordSeparator;
860     }
861 
862     /**
863      * Returns whether to skip the header record.
864      *
865      * @return whether to skip the header record.
866      */
867     public boolean getSkipHeaderRecord() {
868         return skipHeaderRecord;
869     }
870 
871     /**
872      * Returns whether to add a trailing delimiter.
873      *
874      * @return whether to add a trailing delimiter.
875      * @since 1.3
876      */
877     public boolean getTrailingDelimiter() {
878         return trailingDelimiter;
879     }
880 
881     /**
882      * Returns whether to trim leading and trailing blanks.
883      *
884      * @return whether to trim leading and trailing blanks.
885      */
886     public boolean getTrim() {
887         return trim;
888     }
889 
890     @Override
891     public int hashCode() {
892         final int prime = 31;
893         int result = 1;
894 
895         result = prime * result + delimiter;
896         result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
897         result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
898         result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
899         result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
900         result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
901         result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
902         result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
903         result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
904         result = prime * result + (skipHeaderRecord ? 1231 : 1237);
905         result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
906         result = prime * result + Arrays.hashCode(header);
907         return result;
908     }
909 
910     /**
911      * Specifies whether comments are supported by this format.
912      *
913      * Note that the comment introducer character is only recognized at the start of a line.
914      *
915      * @return {@code true} is comments are supported, {@code false} otherwise
916      */
917     public boolean isCommentMarkerSet() {
918         return commentMarker != null;
919     }
920 
921     /**
922      * Returns whether escape are being processed.
923      *
924      * @return {@code true} if escapes are processed
925      */
926     public boolean isEscapeCharacterSet() {
927         return escapeCharacter != null;
928     }
929 
930     /**
931      * Returns whether a nullString has been defined.
932      *
933      * @return {@code true} if a nullString is defined
934      */
935     public boolean isNullStringSet() {
936         return nullString != null;
937     }
938 
939     /**
940      * Returns whether a quoteChar has been defined.
941      *
942      * @return {@code true} if a quoteChar is defined
943      */
944     public boolean isQuoteCharacterSet() {
945         return quoteCharacter != null;
946     }
947 
948     /**
949      * Parses the specified content.
950      *
951      * <p>
952      * See also the various static parse methods on {@link CSVParser}.
953      * </p>
954      *
955      * @param in
956      *            the input stream
957      * @return a parser over a stream of {@link CSVRecord}s.
958      * @throws IOException
959      *             If an I/O error occurs
960      */
961     public CSVParser parse(final Reader in) throws IOException {
962         return new CSVParser(in, this);
963     }
964 
965     /**
966      * Prints to the specified output.
967      *
968      * <p>
969      * See also {@link CSVPrinter}.
970      * </p>
971      *
972      * @param out
973      *            the output.
974      * @return a printer to an output.
975      * @throws IOException
976      *             thrown if the optional header cannot be printed.
977      */
978     public CSVPrinter print(final Appendable out) throws IOException {
979         return new CSVPrinter(out, this);
980     }
981 
982     /**
983      * Prints to the {@link System#out}.
984      *
985      * <p>
986      * See also {@link CSVPrinter}.
987      * </p>
988      *
989      * @return a printer to {@link System#out}.
990      * @throws IOException
991      *             thrown if the optional header cannot be printed.
992      * @since 1.5
993      */
994     public CSVPrinter printer() throws IOException {
995         return new CSVPrinter(System.out, this);
996     }
997 
998     /**
999      * Prints to the specified output.
1000      *
1001      * <p>
1002      * See also {@link CSVPrinter}.
1003      * </p>
1004      *
1005      * @param out
1006      *            the output.
1007      * @param charset
1008      *            A charset.
1009      * @return a printer to an output.
1010      * @throws IOException
1011      *             thrown if the optional header cannot be printed.
1012      * @since 1.5
1013      */
1014     @SuppressWarnings("resource")
1015     public CSVPrinter print(final File out, Charset charset) throws IOException {
1016         // The writer will be closed when close() is called.
1017         return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this);
1018     }
1019 
1020     /**
1021      * Prints to the specified output.
1022      *
1023      * <p>
1024      * See also {@link CSVPrinter}.
1025      * </p>
1026      *
1027      * @param out
1028      *            the output.
1029      * @param charset
1030      *            A charset.
1031      * @return a printer to an output.
1032      * @throws IOException
1033      *             thrown if the optional header cannot be printed.
1034      * @since 1.5
1035      */
1036     public CSVPrinter print(final Path out, Charset charset) throws IOException {
1037         return print(Files.newBufferedWriter(out, charset));
1038     }
1039 
1040     /**
1041      * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated
1042      * as needed. Useful when one wants to avoid creating CSVPrinters.
1043      *
1044      * @param value
1045      *            value to output.
1046      * @param out
1047      *            where to print the value.
1048      * @param newRecord
1049      *            if this a new record.
1050      * @throws IOException
1051      *             If an I/O error occurs.
1052      * @since 1.4
1053      */
1054     public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
1055         // null values are considered empty
1056         // Only call CharSequence.toString() if you have to, helps GC-free use cases.
1057         CharSequence charSequence;
1058         if (value == null) {
1059             // https://issues.apache.org/jira/browse/CSV-203
1060             if (null == nullString) {
1061                 charSequence = EMPTY;
1062             } else {
1063                 if (QuoteMode.ALL == quoteMode) {
1064                     charSequence = quoteCharacter + nullString + quoteCharacter;
1065                 } else {
1066                     charSequence = nullString;
1067                 }
1068             }
1069         } else {
1070             charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString();
1071         }
1072         charSequence = getTrim() ? trim(charSequence) : charSequence;
1073         this.print(value, charSequence, 0, charSequence.length(), out, newRecord);
1074     }
1075 
1076     private void print(final Object object, final CharSequence value, final int offset, final int len,
1077             final Appendable out, final boolean newRecord) throws IOException {
1078         if (!newRecord) {
1079             out.append(getDelimiter());
1080         }
1081         if (object == null) {
1082             out.append(value);
1083         } else if (isQuoteCharacterSet()) {
1084             // the original object is needed so can check for Number
1085             printAndQuote(object, value, offset, len, out, newRecord);
1086         } else if (isEscapeCharacterSet()) {
1087             printAndEscape(value, offset, len, out);
1088         } else {
1089             out.append(value, offset, offset + len);
1090         }
1091     }
1092 
1093     /*
1094      * Note: must only be called if escaping is enabled, otherwise will generate NPE
1095      */
1096     private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out)
1097             throws IOException {
1098         int start = offset;
1099         int pos = offset;
1100         final int end = offset + len;
1101 
1102         final char delim = getDelimiter();
1103         final char escape = getEscapeCharacter().charValue();
1104 
1105         while (pos < end) {
1106             char c = value.charAt(pos);
1107             if (c == CR || c == LF || c == delim || c == escape) {
1108                 // write out segment up until this char
1109                 if (pos > start) {
1110                     out.append(value, start, pos);
1111                 }
1112                 if (c == LF) {
1113                     c = 'n';
1114                 } else if (c == CR) {
1115                     c = 'r';
1116                 }
1117 
1118                 out.append(escape);
1119                 out.append(c);
1120 
1121                 start = pos + 1; // start on the current char after this one
1122             }
1123 
1124             pos++;
1125         }
1126 
1127         // write last segment
1128         if (pos > start) {
1129             out.append(value, start, pos);
1130         }
1131     }
1132 
1133     /*
1134      * Note: must only be called if quoting is enabled, otherwise will generate NPE
1135      */
1136     // the original object is needed so can check for Number
1137     private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len,
1138             final Appendable out, final boolean newRecord) throws IOException {
1139         boolean quote = false;
1140         int start = offset;
1141         int pos = offset;
1142         final int end = offset + len;
1143 
1144         final char delimChar = getDelimiter();
1145         final char quoteChar = getQuoteCharacter().charValue();
1146 
1147         QuoteMode quoteModePolicy = getQuoteMode();
1148         if (quoteModePolicy == null) {
1149             quoteModePolicy = QuoteMode.MINIMAL;
1150         }
1151         switch (quoteModePolicy) {
1152         case ALL:
1153         case ALL_NON_NULL:
1154             quote = true;
1155             break;
1156         case NON_NUMERIC:
1157             quote = !(object instanceof Number);
1158             break;
1159         case NONE:
1160             // Use the existing escaping code
1161             printAndEscape(value, offset, len, out);
1162             return;
1163         case MINIMAL:
1164             if (len <= 0) {
1165                 // always quote an empty token that is the first
1166                 // on the line, as it may be the only thing on the
1167                 // line. If it were not quoted in that case,
1168                 // an empty line has no tokens.
1169                 if (newRecord) {
1170                     quote = true;
1171                 }
1172             } else {
1173                 char c = value.charAt(pos);
1174 
1175                 // RFC4180 (https://tools.ietf.org/html/rfc4180) TEXTDATA =  %x20-21 / %x23-2B / %x2D-7E
1176                 if (newRecord && (c < 0x20 || c > 0x21 && c < 0x23 || c > 0x2B && c < 0x2D || c > 0x7E)) {
1177                     quote = true;
1178                 } else if (c <= COMMENT) {
1179                     // Some other chars at the start of a value caused the parser to fail, so for now
1180                     // encapsulate if we start in anything less than '#'. We are being conservative
1181                     // by including the default comment char too.
1182                     quote = true;
1183                 } else {
1184                     while (pos < end) {
1185                         c = value.charAt(pos);
1186                         if (c == LF || c == CR || c == quoteChar || c == delimChar) {
1187                             quote = true;
1188                             break;
1189                         }
1190                         pos++;
1191                     }
1192 
1193                     if (!quote) {
1194                         pos = end - 1;
1195                         c = value.charAt(pos);
1196                         // Some other chars at the end caused the parser to fail, so for now
1197                         // encapsulate if we end in anything less than ' '
1198                         if (c <= SP) {
1199                             quote = true;
1200                         }
1201                     }
1202                 }
1203             }
1204 
1205             if (!quote) {
1206                 // no encapsulation needed - write out the original value
1207                 out.append(value, start, end);
1208                 return;
1209             }
1210             break;
1211         default:
1212             throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
1213         }
1214 
1215         if (!quote) {
1216             // no encapsulation needed - write out the original value
1217             out.append(value, start, end);
1218             return;
1219         }
1220 
1221         // we hit something that needed encapsulation
1222         out.append(quoteChar);
1223 
1224         // Pick up where we left off: pos should be positioned on the first character that caused
1225         // the need for encapsulation.
1226         while (pos < end) {
1227             final char c = value.charAt(pos);
1228             if (c == quoteChar) {
1229                 // write out the chunk up until this point
1230 
1231                 // add 1 to the length to write out the encapsulator also
1232                 out.append(value, start, pos + 1);
1233                 // put the next starting position on the encapsulator so we will
1234                 // write it out again with the next string (effectively doubling it)
1235                 start = pos;
1236             }
1237             pos++;
1238         }
1239 
1240         // write the last segment
1241         out.append(value, start, pos);
1242         out.append(quoteChar);
1243     }
1244 
1245     /**
1246      * Outputs the trailing delimiter (if set) followed by the record separator (if set).
1247      *
1248      * @param out
1249      *            where to write
1250      * @throws IOException
1251      *             If an I/O error occurs
1252      * @since 1.4
1253      */
1254     public void println(final Appendable out) throws IOException {
1255         if (getTrailingDelimiter()) {
1256             out.append(getDelimiter());
1257         }
1258         if (recordSeparator != null) {
1259             out.append(recordSeparator);
1260         }
1261     }
1262 
1263     /**
1264      * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the
1265      * record separator.
1266      *
1267      * <p>
1268      * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record
1269      * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}.
1270      * </p>
1271      *
1272      * @param out
1273      *            where to write.
1274      * @param values
1275      *            values to output.
1276      * @throws IOException
1277      *             If an I/O error occurs.
1278      * @since 1.4
1279      */
1280     public void printRecord(final Appendable out, final Object... values) throws IOException {
1281         for (int i = 0; i < values.length; i++) {
1282             print(values[i], out, i == 0);
1283         }
1284         println(out);
1285     }
1286 
1287     @Override
1288     public String toString() {
1289         final StringBuilder sb = new StringBuilder();
1290         sb.append("Delimiter=<").append(delimiter).append('>');
1291         if (isEscapeCharacterSet()) {
1292             sb.append(' ');
1293             sb.append("Escape=<").append(escapeCharacter).append('>');
1294         }
1295         if (isQuoteCharacterSet()) {
1296             sb.append(' ');
1297             sb.append("QuoteChar=<").append(quoteCharacter).append('>');
1298         }
1299         if (isCommentMarkerSet()) {
1300             sb.append(' ');
1301             sb.append("CommentStart=<").append(commentMarker).append('>');
1302         }
1303         if (isNullStringSet()) {
1304             sb.append(' ');
1305             sb.append("NullString=<").append(nullString).append('>');
1306         }
1307         if (recordSeparator != null) {
1308             sb.append(' ');
1309             sb.append("RecordSeparator=<").append(recordSeparator).append('>');
1310         }
1311         if (getIgnoreEmptyLines()) {
1312             sb.append(" EmptyLines:ignored");
1313         }
1314         if (getIgnoreSurroundingSpaces()) {
1315             sb.append(" SurroundingSpaces:ignored");
1316         }
1317         if (getIgnoreHeaderCase()) {
1318             sb.append(" IgnoreHeaderCase:ignored");
1319         }
1320         sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
1321         if (headerComments != null) {
1322             sb.append(' ');
1323             sb.append("HeaderComments:").append(Arrays.toString(headerComments));
1324         }
1325         if (header != null) {
1326             sb.append(' ');
1327             sb.append("Header:").append(Arrays.toString(header));
1328         }
1329         return sb.toString();
1330     }
1331 
1332     private String[] toStringArray(final Object[] values) {
1333         if (values == null) {
1334             return null;
1335         }
1336         final String[] strings = new String[values.length];
1337         for (int i = 0; i < values.length; i++) {
1338             final Object value = values[i];
1339             strings[i] = value == null ? null : value.toString();
1340         }
1341         return strings;
1342     }
1343 
1344     private CharSequence trim(final CharSequence charSequence) {
1345         if (charSequence instanceof String) {
1346             return ((String) charSequence).trim();
1347         }
1348         final int count = charSequence.length();
1349         int len = count;
1350         int pos = 0;
1351 
1352         while (pos < len && charSequence.charAt(pos) <= SP) {
1353             pos++;
1354         }
1355         while (pos < len && charSequence.charAt(len - 1) <= SP) {
1356             len--;
1357         }
1358         return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence;
1359     }
1360 
1361     /**
1362      * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
1363      *
1364      * @throws IllegalArgumentException
1365      */
1366     private void validate() throws IllegalArgumentException {
1367         if (isLineBreak(delimiter)) {
1368             throw new IllegalArgumentException("The delimiter cannot be a line break");
1369         }
1370 
1371         if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
1372             throw new IllegalArgumentException(
1373                     "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
1374         }
1375 
1376         if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
1377             throw new IllegalArgumentException(
1378                     "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
1379         }
1380 
1381         if (commentMarker != null && delimiter == commentMarker.charValue()) {
1382             throw new IllegalArgumentException(
1383                     "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
1384         }
1385 
1386         if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
1387             throw new IllegalArgumentException(
1388                     "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')");
1389         }
1390 
1391         if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
1392             throw new IllegalArgumentException(
1393                     "The comment start and the escape character cannot be the same ('" + commentMarker + "')");
1394         }
1395 
1396         if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
1397             throw new IllegalArgumentException("No quotes mode set but no escape character is set");
1398         }
1399 
1400         // validate header
1401         if (header != null) {
1402             final Set<String> dupCheck = new HashSet<>();
1403             for (final String hdr : header) {
1404                 if (!dupCheck.add(hdr)) {
1405                     throw new IllegalArgumentException(
1406                             "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header));
1407                 }
1408             }
1409         }
1410     }
1411 
1412     /**
1413      * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}
1414      *
1415      * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1416      * @see #withAllowMissingColumnNames(boolean)
1417      * @since 1.1
1418      */
1419     public CSVFormat withAllowMissingColumnNames() {
1420         return this.withAllowMissingColumnNames(true);
1421     }
1422 
1423     /**
1424      * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
1425      *
1426      * @param allowMissingColumnNames
1427      *            the missing column names behavior, {@code true} to allow missing column names in the header line,
1428      *            {@code false} to cause an {@link IllegalArgumentException} to be thrown.
1429      * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1430      */
1431     public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
1432         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1433                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1434                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1435     }
1436 
1437     /**
1438      * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
1439      *
1440      * Note that the comment start character is only recognized at the start of a line.
1441      *
1442      * @param commentMarker
1443      *            the comment start marker
1444      * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
1445      * @throws IllegalArgumentException
1446      *             thrown if the specified character is a line break
1447      */
1448     public CSVFormat withCommentMarker(final char commentMarker) {
1449         return withCommentMarker(Character.valueOf(commentMarker));
1450     }
1451 
1452     /**
1453      * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
1454      *
1455      * Note that the comment start character is only recognized at the start of a line.
1456      *
1457      * @param commentMarker
1458      *            the comment start marker, use {@code null} to disable
1459      * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
1460      * @throws IllegalArgumentException
1461      *             thrown if the specified character is a line break
1462      */
1463     public CSVFormat withCommentMarker(final Character commentMarker) {
1464         if (isLineBreak(commentMarker)) {
1465             throw new IllegalArgumentException("The comment start marker character cannot be a line break");
1466         }
1467         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1468                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1469                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1470     }
1471 
1472     /**
1473      * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character.
1474      *
1475      * @param delimiter
1476      *            the delimiter character
1477      * @return A new CSVFormat that is equal to this with the specified character as delimiter
1478      * @throws IllegalArgumentException
1479      *             thrown if the specified character is a line break
1480      */
1481     public CSVFormat withDelimiter(final char delimiter) {
1482         if (isLineBreak(delimiter)) {
1483             throw new IllegalArgumentException("The delimiter cannot be a line break");
1484         }
1485         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1486                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1487                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1488     }
1489 
1490     /**
1491      * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
1492      *
1493      * @param escape
1494      *            the escape character
1495      * @return A new CSVFormat that is equal to his but with the specified character as the escape character
1496      * @throws IllegalArgumentException
1497      *             thrown if the specified character is a line break
1498      */
1499     public CSVFormat withEscape(final char escape) {
1500         return withEscape(Character.valueOf(escape));
1501     }
1502 
1503     /**
1504      * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
1505      *
1506      * @param escape
1507      *            the escape character, use {@code null} to disable
1508      * @return A new CSVFormat that is equal to this but with the specified character as the escape character
1509      * @throws IllegalArgumentException
1510      *             thrown if the specified character is a line break
1511      */
1512     public CSVFormat withEscape(final Character escape) {
1513         if (isLineBreak(escape)) {
1514             throw new IllegalArgumentException("The escape character cannot be a line break");
1515         }
1516         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
1517                 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1518                 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1519     }
1520 
1521     /**
1522      * Returns a new {@code CSVFormat} using the first record as header.
1523      *
1524      * <p>
1525      * Calling this method is equivalent to calling:
1526      * </p>
1527      *
1528      * <pre>
1529      * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
1530      * </pre>
1531      *
1532      * @return A new CSVFormat that is equal to this but using the first record as header.
1533      * @see #withSkipHeaderRecord(boolean)
1534      * @see #withHeader(String...)
1535      * @since 1.3
1536      */
1537     public CSVFormat withFirstRecordAsHeader() {
1538         return withHeader().withSkipHeaderRecord();
1539     }
1540 
1541     /**
1542      * Returns a new {@code CSVFormat} with the header of the format defined by the enum class.
1543      *
1544      * <p>
1545      * Example:
1546      * </p>
1547      * <pre>
1548      * public enum Header {
1549      *     Name, Email, Phone
1550      * }
1551      *
1552      * CSVFormat format = aformat.withHeader(Header.class);
1553      * </pre>
1554      * <p>
1555      * The header is also used by the {@link CSVPrinter}.
1556      * </p>
1557      *
1558      * @param headerEnum
1559      *            the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified
1560      *            otherwise.
1561      *
1562      * @return A new CSVFormat that is equal to this but with the specified header
1563      * @see #withHeader(String...)
1564      * @see #withSkipHeaderRecord(boolean)
1565      * @since 1.3
1566      */
1567     public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) {
1568         String[] header = null;
1569         if (headerEnum != null) {
1570             final Enum<?>[] enumValues = headerEnum.getEnumConstants();
1571             header = new String[enumValues.length];
1572             for (int i = 0; i < enumValues.length; i++) {
1573                 header[i] = enumValues[i].name();
1574             }
1575         }
1576         return withHeader(header);
1577     }
1578 
1579     /**
1580      * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
1581      * either be parsed automatically from the input file with:
1582      *
1583      * <pre>
1584      * CSVFormat format = aformat.withHeader();
1585      * </pre>
1586      *
1587      * or specified manually with:
1588      *
1589      * <pre>
1590      * CSVFormat format = aformat.withHeader(resultSet);
1591      * </pre>
1592      * <p>
1593      * The header is also used by the {@link CSVPrinter}.
1594      * </p>
1595      *
1596      * @param resultSet
1597      *            the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified
1598      *            otherwise.
1599      *
1600      * @return A new CSVFormat that is equal to this but with the specified header
1601      * @throws SQLException
1602      *             SQLException if a database access error occurs or this method is called on a closed result set.
1603      * @since 1.1
1604      */
1605     public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
1606         return withHeader(resultSet != null ? resultSet.getMetaData() : null);
1607     }
1608 
1609     /**
1610      * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
1611      * either be parsed automatically from the input file with:
1612      *
1613      * <pre>
1614      * CSVFormat format = aformat.withHeader();
1615      * </pre>
1616      *
1617      * or specified manually with:
1618      *
1619      * <pre>
1620      * CSVFormat format = aformat.withHeader(metaData);
1621      * </pre>
1622      * <p>
1623      * The header is also used by the {@link CSVPrinter}.
1624      * </p>
1625      *
1626      * @param metaData
1627      *            the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified
1628      *            otherwise.
1629      *
1630      * @return A new CSVFormat that is equal to this but with the specified header
1631      * @throws SQLException
1632      *             SQLException if a database access error occurs or this method is called on a closed result set.
1633      * @since 1.1
1634      */
1635     public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException {
1636         String[] labels = null;
1637         if (metaData != null) {
1638             final int columnCount = metaData.getColumnCount();
1639             labels = new String[columnCount];
1640             for (int i = 0; i < columnCount; i++) {
1641                 labels[i] = metaData.getColumnLabel(i + 1);
1642             }
1643         }
1644         return withHeader(labels);
1645     }
1646 
1647     /**
1648      * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be
1649      * parsed automatically from the input file with:
1650      *
1651      * <pre>
1652      * CSVFormat format = aformat.withHeader();
1653      * </pre>
1654      *
1655      * or specified manually with:
1656      *
1657      * <pre>
1658      * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
1659      * </pre>
1660      * <p>
1661      * The header is also used by the {@link CSVPrinter}.
1662      * </p>
1663      *
1664      * @param header
1665      *            the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
1666      *
1667      * @return A new CSVFormat that is equal to this but with the specified header
1668      * @see #withSkipHeaderRecord(boolean)
1669      */
1670     public CSVFormat withHeader(final String... header) {
1671         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1672                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1673                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1674     }
1675 
1676     /**
1677      * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will
1678      * be printed first, before the headers. This setting is ignored by the parser.
1679      *
1680      * <pre>
1681      * CSVFormat format = aformat.withHeaderComments(&quot;Generated by Apache Commons CSV 1.1.&quot;, new Date());
1682      * </pre>
1683      *
1684      * @param headerComments
1685      *            the headerComments which will be printed by the Printer before the actual CSV data.
1686      *
1687      * @return A new CSVFormat that is equal to this but with the specified header
1688      * @see #withSkipHeaderRecord(boolean)
1689      * @since 1.1
1690      */
1691     public CSVFormat withHeaderComments(final Object... headerComments) {
1692         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1693                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1694                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1695     }
1696 
1697     /**
1698      * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
1699      *
1700      * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1701      * @since {@link #withIgnoreEmptyLines(boolean)}
1702      * @since 1.1
1703      */
1704     public CSVFormat withIgnoreEmptyLines() {
1705         return this.withIgnoreEmptyLines(true);
1706     }
1707 
1708     /**
1709      * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
1710      *
1711      * @param ignoreEmptyLines
1712      *            the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
1713      *            {@code false} to translate empty lines to empty records.
1714      * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1715      */
1716     public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
1717         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1718                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1719                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1720     }
1721 
1722     /**
1723      * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
1724      *
1725      * @return A new CSVFormat that will ignore case header name.
1726      * @see #withIgnoreHeaderCase(boolean)
1727      * @since 1.3
1728      */
1729     public CSVFormat withIgnoreHeaderCase() {
1730         return this.withIgnoreHeaderCase(true);
1731     }
1732 
1733     /**
1734      * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case.
1735      *
1736      * @param ignoreHeaderCase
1737      *            the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as
1738      *            is.
1739      * @return A new CSVFormat that will ignore case header name if specified as {@code true}
1740      * @since 1.3
1741      */
1742     public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
1743         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1744                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1745                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1746     }
1747 
1748     /**
1749      * Returns a new {@code CSVFormat} with the trimming behavior of the format set to {@code true}.
1750      *
1751      * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1752      * @see #withIgnoreSurroundingSpaces(boolean)
1753      * @since 1.1
1754      */
1755     public CSVFormat withIgnoreSurroundingSpaces() {
1756         return this.withIgnoreSurroundingSpaces(true);
1757     }
1758 
1759     /**
1760      * Returns a new {@code CSVFormat} with the trimming behavior of the format set to the given value.
1761      *
1762      * @param ignoreSurroundingSpaces
1763      *            the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the
1764      *            spaces as is.
1765      * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1766      */
1767     public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
1768         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1769                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1770                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1771     }
1772 
1773     /**
1774      * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
1775      * <ul>
1776      * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
1777      * records.</li>
1778      * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
1779      * </ul>
1780      *
1781      * @param nullString
1782      *            the String to convert to and from {@code null}. No substitution occurs if {@code null}
1783      *
1784      * @return A new CSVFormat that is equal to this but with the specified null conversion string.
1785      */
1786     public CSVFormat withNullString(final String nullString) {
1787         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1788                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1789                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1790     }
1791 
1792     /**
1793      * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
1794      *
1795      * @param quoteChar
1796      *            the quoteChar character
1797      * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1798      * @throws IllegalArgumentException
1799      *             thrown if the specified character is a line break
1800      */
1801     public CSVFormat withQuote(final char quoteChar) {
1802         return withQuote(Character.valueOf(quoteChar));
1803     }
1804 
1805     /**
1806      * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
1807      *
1808      * @param quoteChar
1809      *            the quoteChar character, use {@code null} to disable
1810      * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1811      * @throws IllegalArgumentException
1812      *             thrown if the specified character is a line break
1813      */
1814     public CSVFormat withQuote(final Character quoteChar) {
1815         if (isLineBreak(quoteChar)) {
1816             throw new IllegalArgumentException("The quoteChar cannot be a line break");
1817         }
1818         return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
1819                 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1820                 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1821     }
1822 
1823     /**
1824      * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
1825      *
1826      * @param quoteModePolicy
1827      *            the quote policy to use for output.
1828      *
1829      * @return A new CSVFormat that is equal to this but with the specified quote policy
1830      */
1831     public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
1832         return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
1833                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1834                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1835     }
1836 
1837     /**
1838      * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
1839      *
1840      * <p>
1841      * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1842      * only works for inputs with '\n', '\r' and "\r\n"
1843      * </p>
1844      *
1845      * @param recordSeparator
1846      *            the record separator to use for output.
1847      *
1848      * @return A new CSVFormat that is equal to this but with the the specified output record separator
1849      */
1850     public CSVFormat withRecordSeparator(final char recordSeparator) {
1851         return withRecordSeparator(String.valueOf(recordSeparator));
1852     }
1853 
1854     /**
1855      * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
1856      *
1857      * <p>
1858      * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1859      * only works for inputs with '\n', '\r' and "\r\n"
1860      * </p>
1861      *
1862      * @param recordSeparator
1863      *            the record separator to use for output.
1864      *
1865      * @return A new CSVFormat that is equal to this but with the the specified output record separator
1866      * @throws IllegalArgumentException
1867      *             if recordSeparator is none of CR, LF or CRLF
1868      */
1869     public CSVFormat withRecordSeparator(final String recordSeparator) {
1870         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1871                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1872                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1873     }
1874 
1875     /**
1876      * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}.
1877      *
1878      * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1879      * @see #withSkipHeaderRecord(boolean)
1880      * @see #withHeader(String...)
1881      * @since 1.1
1882      */
1883     public CSVFormat withSkipHeaderRecord() {
1884         return this.withSkipHeaderRecord(true);
1885     }
1886 
1887     /**
1888      * Returns a new {@code CSVFormat} with whether to skip the header record.
1889      *
1890      * @param skipHeaderRecord
1891      *            whether to skip the header record.
1892      *
1893      * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1894      * @see #withHeader(String...)
1895      */
1896     public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
1897         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1898                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1899                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1900     }
1901 
1902     /**
1903      * Returns a new {@code CSVFormat} to add a trailing delimiter.
1904      *
1905      * @return A new CSVFormat that is equal to this but with the trailing delimiter setting.
1906      * @since 1.3
1907      */
1908     public CSVFormat withTrailingDelimiter() {
1909         return withTrailingDelimiter(true);
1910     }
1911 
1912     /**
1913      * Returns a new {@code CSVFormat} with whether to add a trailing delimiter.
1914      *
1915      * @param trailingDelimiter
1916      *            whether to add a trailing delimiter.
1917      *
1918      * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting.
1919      * @since 1.3
1920      */
1921     public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
1922         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1923                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1924                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1925     }
1926 
1927     /**
1928      * Returns a new {@code CSVFormat} to trim leading and trailing blanks.
1929      *
1930      * @return A new CSVFormat that is equal to this but with the trim setting on.
1931      * @since 1.3
1932      */
1933     public CSVFormat withTrim() {
1934         return withTrim(true);
1935     }
1936 
1937     /**
1938      * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks.
1939      *
1940      * @param trim
1941      *            whether to trim leading and trailing blanks.
1942      *
1943      * @return A new CSVFormat that is equal to this but with the specified trim setting.
1944      * @since 1.3
1945      */
1946     public CSVFormat withTrim(final boolean trim) {
1947         return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1948                 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1949                 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
1950     }
1951 }