001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Constants.BACKSLASH;
021import static org.apache.commons.csv.Constants.COMMA;
022import static org.apache.commons.csv.Constants.CR;
023import static org.apache.commons.csv.Constants.CRLF;
024import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
025import static org.apache.commons.csv.Constants.LF;
026import static org.apache.commons.csv.Constants.TAB;
027
028import java.io.IOException;
029import java.io.Reader;
030import java.io.Serializable;
031import java.io.StringWriter;
032import java.sql.ResultSet;
033import java.sql.ResultSetMetaData;
034import java.sql.SQLException;
035import java.util.Arrays;
036import java.util.HashSet;
037import java.util.Set;
038
039/**
040 * Specifies the format of a CSV file and parses input.
041 *
042 * <h2>Using predefined formats</h2>
043 *
044 * <p>
045 * You can use one of the predefined formats:
046 * </p>
047 *
048 * <ul>
049 * <li>{@link #DEFAULT}</li>
050 * <li>{@link #EXCEL}</li>
051 * <li>{@link #MYSQL}</li>
052 * <li>{@link #RFC4180}</li>
053 * <li>{@link #TDF}</li>
054 * </ul>
055 *
056 * <p>
057 * For example:
058 * </p>
059 *
060 * <pre>
061 * CSVParser parser = CSVFormat.EXCEL.parse(reader);
062 * </pre>
063 *
064 * <p>
065 * The {@link CSVParser} provides static methods to parse other input types, for example:
066 * </p>
067 *
068 * <pre>
069 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
070 * </pre>
071 *
072 * <h2>Defining formats</h2>
073 *
074 * <p>
075 * You can extend a format by calling the {@code with} methods. For example:
076 * </p>
077 *
078 * <pre>
079 * CSVFormat.EXCEL.withNullString(&quot;N/A&quot;).withIgnoreSurroundingSpaces(true);
080 * </pre>
081 *
082 * <h2>Defining column names</h2>
083 *
084 * <p>
085 * To define the column names you want to use to access records, write:
086 * </p>
087 *
088 * <pre>
089 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;);
090 * </pre>
091 *
092 * <p>
093 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and
094 * assumes that your CSV source does not contain a first record that also defines column names.
095 *
096 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
097 * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
098 * </p>
099 *
100 * <h2>Parsing</h2>
101 *
102 * <p>
103 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
104 * </p>
105 *
106 * <pre>
107 * Reader in = ...;
108 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;).parse(in);
109 * </pre>
110 *
111 * <p>
112 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
113 * </p>
114 *
115 * <h2>Referencing columns safely</h2>
116 *
117 * <p>
118 * If your source contains a header record, you can simplify your code and safely reference columns, by using
119 * {@link #withHeader(String...)} with no arguments:
120 * </p>
121 *
122 * <pre>
123 * CSVFormat.EXCEL.withHeader();
124 * </pre>
125 *
126 * <p>
127 * This causes the parser to read the first record and use its values as column names.
128 *
129 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
130 * </p>
131 *
132 * <pre>
133 * String value = record.get(&quot;Col1&quot;);
134 * </pre>
135 *
136 * <p>
137 * This makes your code impervious to changes in column order in the CSV file.
138 * </p>
139 *
140 * <h2>Notes</h2>
141 *
142 * <p>
143 * This class is immutable.
144 * </p>
145 *
146 * @version $Id: CSVFormat.java 1638699 2014-11-12 03:49:31Z ggregory $
147 */
148public final class CSVFormat implements Serializable {
149
150    private static final long serialVersionUID = 1L;
151
152    private final char delimiter;
153    private final Character quoteCharacter; // null if quoting is disabled
154    private final QuoteMode quoteMode;
155    private final Character commentMarker; // null if commenting is disabled
156    private final Character escapeCharacter; // null if escaping is disabled
157    private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
158    private final boolean allowMissingColumnNames;
159    private final boolean ignoreEmptyLines;
160    private final String recordSeparator; // for outputs
161    private final String nullString; // the string to be used for null values
162    private final String[] header; // array of header column names
163    private final String[] headerComments; // array of header comment lines
164    private final boolean skipHeaderRecord;
165
166    /**
167     * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
168     *
169     * <p>
170     * Settings are:
171     * </p>
172     * <ul>
173     * <li>withDelimiter(',')</li>
174     * <li>withQuote('"')</li>
175     * <li>withRecordSeparator("\r\n")</li>
176     * <li>withIgnoreEmptyLines(true)</li>
177     * </ul>
178     */
179    public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true,
180            CRLF, null, null, null, false, false);
181
182    /**
183     * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
184     *
185     * <p>
186     * Settings are:
187     * </p>
188     * <ul>
189     * <li>withDelimiter(',')</li>
190     * <li>withQuote('"')</li>
191     * <li>withRecordSeparator("\r\n")</li>
192     * <li>withIgnoreEmptyLines(false)</li>
193     * </ul>
194     */
195    public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
196
197    /**
198     * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
199     * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
200     *
201     * <p>
202     * For example for parsing or generating a CSV file on a French system the following format will be used:
203     * </p>
204     *
205     * <pre>
206     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
207     * </pre>
208     *
209     * <p>
210     * Settings are:
211     * </p>
212     * <ul>
213     * <li>{@link #withDelimiter(char) withDelimiter(',')}</li>
214     * <li>{@link #withQuote(char) withQuote('"')}</li>
215     * <li>{@link #withRecordSeparator(String) withRecordSeparator("\r\n")}</li>
216     * <li>{@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}</li>
217     * <li>{@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}</li>
218     * </ul>
219     * <p>
220     * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
221     * withAllowMissingColumnNames(true)}.
222     * </p>
223     */
224    public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false).withAllowMissingColumnNames();
225
226    /**
227     * Tab-delimited format.
228     *
229     * <p>
230     * Settings are:
231     * </p>
232     * <ul>
233     * <li>withDelimiter('\t')</li>
234     * <li>withQuote('"')</li>
235     * <li>withRecordSeparator("\r\n")</li>
236     * <li>withIgnoreSurroundingSpaces(true)</li>
237     * </ul>
238     */
239    public static final CSVFormat TDF = DEFAULT.withDelimiter(TAB).withIgnoreSurroundingSpaces();
240
241    /**
242     * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
243     *
244     * <p>
245     * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
246     * characters are escaped with '\'.
247     * </p>
248     *
249     * <p>
250     * Settings are:
251     * </p>
252     * <ul>
253     * <li>withDelimiter('\t')</li>
254     * <li>withQuote(null)</li>
255     * <li>withRecordSeparator('\n')</li>
256     * <li>withIgnoreEmptyLines(false)</li>
257     * <li>withEscape('\\')</li>
258     * </ul>
259     *
260     * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
261     *      http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
262     */
263    public static final CSVFormat MYSQL = DEFAULT.withDelimiter(TAB).withEscape(BACKSLASH).withIgnoreEmptyLines(false)
264            .withQuote(null).withRecordSeparator(LF);
265
266    /**
267     * Returns true if the given character is a line break character.
268     *
269     * @param c
270     *            the character to check
271     *
272     * @return true if <code>c</code> is a line break character
273     */
274    private static boolean isLineBreak(final char c) {
275        return c == LF || c == CR;
276    }
277
278    /**
279     * Returns true if the given character is a line break character.
280     *
281     * @param c
282     *            the character to check, may be null
283     *
284     * @return true if <code>c</code> is a line break character (and not null)
285     */
286    private static boolean isLineBreak(final Character c) {
287        return c != null && isLineBreak(c.charValue());
288    }
289
290    /**
291     * Creates a new CSV format with the specified delimiter.
292     *
293     * <p>
294     * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized
295     * with null/false.
296     * </p>
297     *
298     * @param delimiter
299     *            the char used for value separation, must not be a line break character
300     * @return a new CSV format.
301     * @throws IllegalArgumentException
302     *             if the delimiter is a line break character
303     *
304     * @see #DEFAULT
305     * @see #RFC4180
306     * @see #MYSQL
307     * @see #EXCEL
308     * @see #TDF
309     */
310    public static CSVFormat newFormat(final char delimiter) {
311        return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false);
312    }
313
314    /**
315     * Creates a customized CSV format.
316     *
317     * @param delimiter
318     *            the char used for value separation, must not be a line break character
319     * @param quoteChar
320     *            the Character used as value encapsulation marker, may be {@code null} to disable
321     * @param quoteMode
322     *            the quote mode
323     * @param commentStart
324     *            the Character used for comment identification, may be {@code null} to disable
325     * @param escape
326     *            the Character used to escape special characters in values, may be {@code null} to disable
327     * @param ignoreSurroundingSpaces
328     *            {@code true} when whitespaces enclosing values should be ignored
329     * @param ignoreEmptyLines
330     *            {@code true} when the parser should skip empty lines
331     * @param recordSeparator
332     *            the line separator to use for output
333     * @param nullString
334     *            the line separator to use for output
335     * @param headerComments
336     *            the comments to be printed by the Printer before the actual CSV data
337     * @param header
338     *            the header
339     * @param skipHeaderRecord
340     *            TODO
341     * @param allowMissingColumnNames
342     *            TODO
343     * @throws IllegalArgumentException
344     *             if the delimiter is a line break character
345     */
346    private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode,
347            final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces,
348            final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
349            final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
350            final boolean allowMissingColumnNames) {
351        this.delimiter = delimiter;
352        this.quoteCharacter = quoteChar;
353        this.quoteMode = quoteMode;
354        this.commentMarker = commentStart;
355        this.escapeCharacter = escape;
356        this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
357        this.allowMissingColumnNames = allowMissingColumnNames;
358        this.ignoreEmptyLines = ignoreEmptyLines;
359        this.recordSeparator = recordSeparator;
360        this.nullString = nullString;
361        this.headerComments = toStringArray(headerComments);
362        this.header = header == null ? null : header.clone();
363        this.skipHeaderRecord = skipHeaderRecord;
364        validate();
365    }
366
367    private String[] toStringArray(final Object[] values) {
368        if (values == null) {
369            return null;
370        }
371        final String[] strings = new String[values.length];
372        for (int i = 0; i < values.length; i++) {
373            final Object value = values[i];
374            strings[i] = value == null ? null : value.toString();
375        }
376        return strings;
377    }
378
379    @Override
380    public boolean equals(final Object obj) {
381        if (this == obj) {
382            return true;
383        }
384        if (obj == null) {
385            return false;
386        }
387        if (getClass() != obj.getClass()) {
388            return false;
389        }
390
391        final CSVFormat other = (CSVFormat) obj;
392        if (delimiter != other.delimiter) {
393            return false;
394        }
395        if (quoteMode != other.quoteMode) {
396            return false;
397        }
398        if (quoteCharacter == null) {
399            if (other.quoteCharacter != null) {
400                return false;
401            }
402        } else if (!quoteCharacter.equals(other.quoteCharacter)) {
403            return false;
404        }
405        if (commentMarker == null) {
406            if (other.commentMarker != null) {
407                return false;
408            }
409        } else if (!commentMarker.equals(other.commentMarker)) {
410            return false;
411        }
412        if (escapeCharacter == null) {
413            if (other.escapeCharacter != null) {
414                return false;
415            }
416        } else if (!escapeCharacter.equals(other.escapeCharacter)) {
417            return false;
418        }
419        if (nullString == null) {
420            if (other.nullString != null) {
421                return false;
422            }
423        } else if (!nullString.equals(other.nullString)) {
424            return false;
425        }
426        if (!Arrays.equals(header, other.header)) {
427            return false;
428        }
429        if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
430            return false;
431        }
432        if (ignoreEmptyLines != other.ignoreEmptyLines) {
433            return false;
434        }
435        if (skipHeaderRecord != other.skipHeaderRecord) {
436            return false;
437        }
438        if (recordSeparator == null) {
439            if (other.recordSeparator != null) {
440                return false;
441            }
442        } else if (!recordSeparator.equals(other.recordSeparator)) {
443            return false;
444        }
445        return true;
446    }
447
448    /**
449     * Formats the specified values.
450     *
451     * @param values
452     *            the values to format
453     * @return the formatted values
454     */
455    public String format(final Object... values) {
456        final StringWriter out = new StringWriter();
457        try {
458            new CSVPrinter(out, this).printRecord(values);
459            return out.toString().trim();
460        } catch (final IOException e) {
461            // should not happen because a StringWriter does not do IO.
462            throw new IllegalStateException(e);
463        }
464    }
465
466    /**
467     * Returns the character marking the start of a line comment.
468     *
469     * @return the comment start marker, may be {@code null}
470     */
471    public Character getCommentMarker() {
472        return commentMarker;
473    }
474
475    /**
476     * Returns the character delimiting the values (typically ';', ',' or '\t').
477     *
478     * @return the delimiter character
479     */
480    public char getDelimiter() {
481        return delimiter;
482    }
483
484    /**
485     * Returns the escape character.
486     *
487     * @return the escape character, may be {@code null}
488     */
489    public Character getEscapeCharacter() {
490        return escapeCharacter;
491    }
492
493    /**
494     * Returns a copy of the header array.
495     *
496     * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
497     */
498    public String[] getHeader() {
499        return header != null ? header.clone() : null;
500    }
501
502    /**
503     * Returns a copy of the header comment array.
504     *
505     * @return a copy of the header comment array; {@code null} if disabled.
506     */
507    public String[] getHeaderComments() {
508        return headerComments != null ? headerComments.clone() : null;
509    }
510
511    /**
512     * Specifies whether missing column names are allowed when parsing the header line.
513     *
514     * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
515     *         {@link IllegalArgumentException}.
516     */
517    public boolean getAllowMissingColumnNames() {
518        return allowMissingColumnNames;
519    }
520
521    /**
522     * Specifies whether empty lines between records are ignored when parsing input.
523     *
524     * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
525     *         records.
526     */
527    public boolean getIgnoreEmptyLines() {
528        return ignoreEmptyLines;
529    }
530
531    /**
532     * Specifies whether spaces around values are ignored when parsing input.
533     *
534     * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
535     */
536    public boolean getIgnoreSurroundingSpaces() {
537        return ignoreSurroundingSpaces;
538    }
539
540    /**
541     * Gets the String to convert to and from {@code null}.
542     * <ul>
543     * <li>
544     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
545     * records.</li>
546     * <li>
547     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
548     * </ul>
549     *
550     * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
551     */
552    public String getNullString() {
553        return nullString;
554    }
555
556    /**
557     * Returns the character used to encapsulate values containing special characters.
558     *
559     * @return the quoteChar character, may be {@code null}
560     */
561    public Character getQuoteCharacter() {
562        return quoteCharacter;
563    }
564
565    /**
566     * Returns the quote policy output fields.
567     *
568     * @return the quote policy
569     */
570    public QuoteMode getQuoteMode() {
571        return quoteMode;
572    }
573
574    /**
575     * Returns the record separator delimiting output records.
576     *
577     * @return the record separator
578     */
579    public String getRecordSeparator() {
580        return recordSeparator;
581    }
582
583    /**
584     * Returns whether to skip the header record.
585     *
586     * @return whether to skip the header record.
587     */
588    public boolean getSkipHeaderRecord() {
589        return skipHeaderRecord;
590    }
591
592    @Override
593    public int hashCode() {
594        final int prime = 31;
595        int result = 1;
596
597        result = prime * result + delimiter;
598        result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
599        result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
600        result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
601        result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
602        result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
603        result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
604        result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
605        result = prime * result + (skipHeaderRecord ? 1231 : 1237);
606        result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
607        result = prime * result + Arrays.hashCode(header);
608        return result;
609    }
610
611    /**
612     * Specifies whether comments are supported by this format.
613     *
614     * Note that the comment introducer character is only recognized at the start of a line.
615     *
616     * @return {@code true} is comments are supported, {@code false} otherwise
617     */
618    public boolean isCommentMarkerSet() {
619        return commentMarker != null;
620    }
621
622    /**
623     * Returns whether escape are being processed.
624     *
625     * @return {@code true} if escapes are processed
626     */
627    public boolean isEscapeCharacterSet() {
628        return escapeCharacter != null;
629    }
630
631    /**
632     * Returns whether a nullString has been defined.
633     *
634     * @return {@code true} if a nullString is defined
635     */
636    public boolean isNullStringSet() {
637        return nullString != null;
638    }
639
640    /**
641     * Returns whether a quoteChar has been defined.
642     *
643     * @return {@code true} if a quoteChar is defined
644     */
645    public boolean isQuoteCharacterSet() {
646        return quoteCharacter != null;
647    }
648
649    /**
650     * Parses the specified content.
651     *
652     * <p>
653     * See also the various static parse methods on {@link CSVParser}.
654     * </p>
655     *
656     * @param in
657     *            the input stream
658     * @return a parser over a stream of {@link CSVRecord}s.
659     * @throws IOException
660     *             If an I/O error occurs
661     */
662    public CSVParser parse(final Reader in) throws IOException {
663        return new CSVParser(in, this);
664    }
665
666    /**
667     * Prints to the specified output.
668     *
669     * <p>
670     * See also {@link CSVPrinter}.
671     * </p>
672     *
673     * @param out
674     *            the output
675     * @return a printer to an output
676     * @throws IOException
677     *             thrown if the optional header cannot be printed.
678     */
679    public CSVPrinter print(final Appendable out) throws IOException {
680        return new CSVPrinter(out, this);
681    }
682
683    @Override
684    public String toString() {
685        final StringBuilder sb = new StringBuilder();
686        sb.append("Delimiter=<").append(delimiter).append('>');
687        if (isEscapeCharacterSet()) {
688            sb.append(' ');
689            sb.append("Escape=<").append(escapeCharacter).append('>');
690        }
691        if (isQuoteCharacterSet()) {
692            sb.append(' ');
693            sb.append("QuoteChar=<").append(quoteCharacter).append('>');
694        }
695        if (isCommentMarkerSet()) {
696            sb.append(' ');
697            sb.append("CommentStart=<").append(commentMarker).append('>');
698        }
699        if (isNullStringSet()) {
700            sb.append(' ');
701            sb.append("NullString=<").append(nullString).append('>');
702        }
703        if (recordSeparator != null) {
704            sb.append(' ');
705            sb.append("RecordSeparator=<").append(recordSeparator).append('>');
706        }
707        if (getIgnoreEmptyLines()) {
708            sb.append(" EmptyLines:ignored");
709        }
710        if (getIgnoreSurroundingSpaces()) {
711            sb.append(" SurroundingSpaces:ignored");
712        }
713        sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
714        if (headerComments != null) {
715            sb.append(' ');
716            sb.append("HeaderComments:").append(Arrays.toString(headerComments));
717        }
718        if (header != null) {
719            sb.append(' ');
720            sb.append("Header:").append(Arrays.toString(header));
721        }
722        return sb.toString();
723    }
724
725    /**
726     * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
727     *
728     * @throws IllegalArgumentException
729     */
730    private void validate() throws IllegalArgumentException {
731        if (isLineBreak(delimiter)) {
732            throw new IllegalArgumentException("The delimiter cannot be a line break");
733        }
734        
735        if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
736            throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" +
737                    quoteCharacter + "')");
738        }
739
740        if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
741            throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" +
742                    escapeCharacter + "')");
743        }
744
745        if (commentMarker != null && delimiter == commentMarker.charValue()) {
746            throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" +
747                    commentMarker + "')");
748        }
749
750        if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
751            throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" +
752                    commentMarker + "')");
753        }
754
755        if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
756            throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" +
757                    commentMarker + "')");
758        }
759
760        if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
761            throw new IllegalArgumentException("No quotes mode set but no escape character is set");
762        }
763        
764        // validate header
765        if (header != null) {
766            final Set<String> dupCheck = new HashSet<String>();
767            for (final String hdr : header) {
768                if (!dupCheck.add(hdr)) {
769                    throw new IllegalArgumentException("The header contains a duplicate entry: '" + hdr + "' in " +
770                            Arrays.toString(header));
771                }
772            }
773        }
774    }
775
776    /**
777     * Sets the comment start marker of the format to the specified character.
778     *
779     * Note that the comment start character is only recognized at the start of a line.
780     *
781     * @param commentMarker
782     *            the comment start marker
783     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
784     * @throws IllegalArgumentException
785     *             thrown if the specified character is a line break
786     */
787    public CSVFormat withCommentMarker(final char commentMarker) {
788        return withCommentMarker(Character.valueOf(commentMarker));
789    }
790
791    /**
792     * Sets the comment start marker of the format to the specified character.
793     *
794     * Note that the comment start character is only recognized at the start of a line.
795     *
796     * @param commentMarker
797     *            the comment start marker, use {@code null} to disable
798     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
799     * @throws IllegalArgumentException
800     *             thrown if the specified character is a line break
801     */
802    public CSVFormat withCommentMarker(final Character commentMarker) {
803        if (isLineBreak(commentMarker)) {
804            throw new IllegalArgumentException("The comment start marker character cannot be a line break");
805        }
806        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
807                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
808                allowMissingColumnNames);
809    }
810
811    /**
812     * Sets the delimiter of the format to the specified character.
813     *
814     * @param delimiter
815     *            the delimiter character
816     * @return A new CSVFormat that is equal to this with the specified character as delimiter
817     * @throws IllegalArgumentException
818     *             thrown if the specified character is a line break
819     */
820    public CSVFormat withDelimiter(final char delimiter) {
821        if (isLineBreak(delimiter)) {
822            throw new IllegalArgumentException("The delimiter cannot be a line break");
823        }
824        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
825                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
826                allowMissingColumnNames);
827    }
828
829    /**
830     * Sets the escape character of the format to the specified character.
831     *
832     * @param escape
833     *            the escape character
834     * @return A new CSVFormat that is equal to his but with the specified character as the escape character
835     * @throws IllegalArgumentException
836     *             thrown if the specified character is a line break
837     */
838    public CSVFormat withEscape(final char escape) {
839        return withEscape(Character.valueOf(escape));
840    }
841
842    /**
843     * Sets the escape character of the format to the specified character.
844     *
845     * @param escape
846     *            the escape character, use {@code null} to disable
847     * @return A new CSVFormat that is equal to this but with the specified character as the escape character
848     * @throws IllegalArgumentException
849     *             thrown if the specified character is a line break
850     */
851    public CSVFormat withEscape(final Character escape) {
852        if (isLineBreak(escape)) {
853            throw new IllegalArgumentException("The escape character cannot be a line break");
854        }
855        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
856                ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord, allowMissingColumnNames);
857    }
858
859    /**
860     * Sets the header of the format. The header can either be parsed automatically from the input file with:
861     *
862     * <pre>
863     * CSVFormat format = aformat.withHeader();
864     * </pre>
865     *
866     * or specified manually with:
867     *
868     * <pre>
869     * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
870     * </pre>
871     * <p>
872     * The header is also used by the {@link CSVPrinter}..
873     * </p>
874     *
875     * @param header
876     *            the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
877     *
878     * @return A new CSVFormat that is equal to this but with the specified header
879     * @see #withSkipHeaderRecord(boolean)
880     */
881    public CSVFormat withHeader(final String... header) {
882        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
883                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
884                allowMissingColumnNames);
885    }
886
887    /**
888     * Sets the header of the format. The header can either be parsed automatically from the input file with:
889     *
890     * <pre>
891     * CSVFormat format = aformat.withHeader();
892     * </pre>
893     *
894     * or specified manually with:
895     *
896     * <pre>
897     * CSVFormat format = aformat.withHeader(resultSet);
898     * </pre>
899     * <p>
900     * The header is also used by the {@link CSVPrinter}..
901     * </p>
902     *
903     * @param resultSet
904     *            the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified
905     *            otherwise.
906     *
907     * @return A new CSVFormat that is equal to this but with the specified header
908     * @throws SQLException
909     *             SQLException if a database access error occurs or this method is called on a closed result set.
910     * @since 1.1
911     */
912    public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
913        return withHeader(resultSet != null ? resultSet.getMetaData() : null);
914    }
915
916    /**
917     * Sets the header of the format. The header can either be parsed automatically from the input file with:
918     *
919     * <pre>
920     * CSVFormat format = aformat.withHeader();
921     * </pre>
922     *
923     * or specified manually with:
924     *
925     * <pre>
926     * CSVFormat format = aformat.withHeader(metaData);
927     * </pre>
928     * <p>
929     * The header is also used by the {@link CSVPrinter}..
930     * </p>
931     *
932     * @param metaData
933     *            the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified
934     *            otherwise.
935     *
936     * @return A new CSVFormat that is equal to this but with the specified header
937     * @throws SQLException
938     *             SQLException if a database access error occurs or this method is called on a closed result set.
939     * @since 1.1
940     */
941    public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException {
942        String[] labels = null;
943        if (metaData != null) {
944            final int columnCount = metaData.getColumnCount();
945            labels = new String[columnCount];
946            for (int i = 0; i < columnCount; i++) {
947                labels[i] = metaData.getColumnLabel(i + 1);
948            }
949        }
950        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
951                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, labels, skipHeaderRecord,
952                allowMissingColumnNames);
953    }
954
955    /**
956     * Sets the header comments of the format. The comments will be printed first, before the headers. This setting is
957     * ignored by the parser.
958     *
959     * <pre>
960     * CSVFormat format = aformat.withHeaderComments(&quot;Generated by Apache Commons CSV 1.1.&quot;, new Date());
961     * </pre>
962     *
963     * @param headerComments
964     *            the headerComments which will be printed by the Printer before the actual CSV data.
965     *
966     * @return A new CSVFormat that is equal to this but with the specified header
967     * @see #withSkipHeaderRecord(boolean)
968     * @since 1.1
969     */
970    public CSVFormat withHeaderComments(final Object... headerComments) {
971        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
972                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
973                skipHeaderRecord, allowMissingColumnNames);
974    }
975
976    /**
977     * Sets the missing column names behavior of the format to {@code true}
978     *
979     * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
980     * @see #withAllowMissingColumnNames(boolean)
981     * @since 1.1
982     */
983    public CSVFormat withAllowMissingColumnNames() {
984        return this.withAllowMissingColumnNames(true);
985    }
986
987    /**
988     * Sets the missing column names behavior of the format.
989     *
990     * @param allowMissingColumnNames
991     *            the missing column names behavior, {@code true} to allow missing column names in the header line,
992     *            {@code false} to cause an {@link IllegalArgumentException} to be thrown.
993     * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
994     */
995    public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
996        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
997                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
998                allowMissingColumnNames);
999    }
1000
1001    /**
1002     * Sets the empty line skipping behavior of the format to {@code true}.
1003     *
1004     * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1005     * @since {@link #withIgnoreEmptyLines(boolean)}
1006     * @since 1.1
1007     */
1008    public CSVFormat withIgnoreEmptyLines() {
1009        return this.withIgnoreEmptyLines(true);
1010    }
1011
1012    /**
1013     * Sets the empty line skipping behavior of the format.
1014     *
1015     * @param ignoreEmptyLines
1016     *            the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
1017     *            {@code false} to translate empty lines to empty records.
1018     * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1019     */
1020    public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
1021        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1022                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1023                allowMissingColumnNames);
1024    }
1025
1026    /**
1027     * Sets the trimming behavior of the format to {@code true}.
1028     *
1029     * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1030     * @see #withIgnoreSurroundingSpaces(boolean)
1031     * @since 1.1
1032     */
1033    public CSVFormat withIgnoreSurroundingSpaces() {
1034        return this.withIgnoreSurroundingSpaces(true);
1035    }
1036
1037    /**
1038     * Sets the trimming behavior of the format.
1039     *
1040     * @param ignoreSurroundingSpaces
1041     *            the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the
1042     *            spaces as is.
1043     * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1044     */
1045    public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
1046        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1047                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1048                allowMissingColumnNames);
1049    }
1050
1051    /**
1052     * Performs conversions to and from null for strings on input and output.
1053     * <ul>
1054     * <li>
1055     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
1056     * records.</li>
1057     * <li>
1058     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
1059     * </ul>
1060     *
1061     * @param nullString
1062     *            the String to convert to and from {@code null}. No substitution occurs if {@code null}
1063     *
1064     * @return A new CSVFormat that is equal to this but with the specified null conversion string.
1065     */
1066    public CSVFormat withNullString(final String nullString) {
1067        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1068                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1069                allowMissingColumnNames);
1070    }
1071
1072    /**
1073     * Sets the quoteChar of the format to the specified character.
1074     *
1075     * @param quoteChar
1076     *            the quoteChar character
1077     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1078     * @throws IllegalArgumentException
1079     *             thrown if the specified character is a line break
1080     */
1081    public CSVFormat withQuote(final char quoteChar) {
1082        return withQuote(Character.valueOf(quoteChar));
1083    }
1084
1085    /**
1086     * Sets the quoteChar of the format to the specified character.
1087     *
1088     * @param quoteChar
1089     *            the quoteChar character, use {@code null} to disable
1090     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1091     * @throws IllegalArgumentException
1092     *             thrown if the specified character is a line break
1093     */
1094    public CSVFormat withQuote(final Character quoteChar) {
1095        if (isLineBreak(quoteChar)) {
1096            throw new IllegalArgumentException("The quoteChar cannot be a line break");
1097        }
1098        return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
1099                ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord, allowMissingColumnNames);
1100    }
1101
1102    /**
1103     * Sets the output quote policy of the format to the specified value.
1104     *
1105     * @param quoteModePolicy
1106     *            the quote policy to use for output.
1107     *
1108     * @return A new CSVFormat that is equal to this but with the specified quote policy
1109     */
1110    public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
1111        return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
1112                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1113                allowMissingColumnNames);
1114    }
1115
1116    /**
1117     * Sets the record separator of the format to the specified character.
1118     *
1119     * <p>
1120     * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1121     * only works for inputs with '\n', '\r' and "\r\n"
1122     * </p>
1123     *
1124     * @param recordSeparator
1125     *            the record separator to use for output.
1126     *
1127     * @return A new CSVFormat that is equal to this but with the the specified output record separator
1128     */
1129    public CSVFormat withRecordSeparator(final char recordSeparator) {
1130        return withRecordSeparator(String.valueOf(recordSeparator));
1131    }
1132
1133    /**
1134     * Sets the record separator of the format to the specified String.
1135     *
1136     * <p>
1137     * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1138     * only works for inputs with '\n', '\r' and "\r\n"
1139     * </p>
1140     *
1141     * @param recordSeparator
1142     *            the record separator to use for output.
1143     *
1144     * @return A new CSVFormat that is equal to this but with the the specified output record separator
1145     * @throws IllegalArgumentException
1146     *             if recordSeparator is none of CR, LF or CRLF
1147     */
1148    public CSVFormat withRecordSeparator(final String recordSeparator) {
1149        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1150                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1151                allowMissingColumnNames);
1152    }
1153
1154    /**
1155     * Sets skipping the header record to {@code true}.
1156     *
1157     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1158     * @see #withSkipHeaderRecord(boolean)
1159     * @see #withHeader(String...)
1160     * @since 1.1
1161     */
1162    public CSVFormat withSkipHeaderRecord() {
1163        return this.withSkipHeaderRecord(true);
1164    }
1165
1166    /**
1167     * Sets whether to skip the header record.
1168     *
1169     * @param skipHeaderRecord
1170     *            whether to skip the header record.
1171     *
1172     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1173     * @see #withHeader(String...)
1174     */
1175    public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
1176        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1177                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, null, header, skipHeaderRecord,
1178                allowMissingColumnNames);
1179    }
1180}