001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Constants.BACKSLASH;
021import static org.apache.commons.csv.Constants.COMMA;
022import static org.apache.commons.csv.Constants.CR;
023import static org.apache.commons.csv.Constants.CRLF;
024import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
025import static org.apache.commons.csv.Constants.LF;
026import static org.apache.commons.csv.Constants.TAB;
027
028import java.io.IOException;
029import java.io.Reader;
030import java.io.Serializable;
031import java.io.StringWriter;
032import java.sql.ResultSet;
033import java.sql.ResultSetMetaData;
034import java.sql.SQLException;
035import java.util.Arrays;
036import java.util.HashSet;
037import java.util.Set;
038
039/**
040 * Specifies the format of a CSV file and parses input.
041 *
042 * <h2>Using predefined formats</h2>
043 *
044 * <p>
045 * You can use one of the predefined formats:
046 * </p>
047 *
048 * <ul>
049 * <li>{@link #DEFAULT}</li>
050 * <li>{@link #EXCEL}</li>
051 * <li>{@link #MYSQL}</li>
052 * <li>{@link #RFC4180}</li>
053 * <li>{@link #TDF}</li>
054 * </ul>
055 *
056 * <p>
057 * For example:
058 * </p>
059 *
060 * <pre>
061 * CSVParser parser = CSVFormat.EXCEL.parse(reader);
062 * </pre>
063 *
064 * <p>
065 * The {@link CSVParser} provides static methods to parse other input types, for example:
066 * </p>
067 *
068 * <pre>
069 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
070 * </pre>
071 *
072 * <h2>Defining formats</h2>
073 *
074 * <p>
075 * You can extend a format by calling the {@code with} methods. For example:
076 * </p>
077 *
078 * <pre>
079 * CSVFormat.EXCEL.withNullString(&quot;N/A&quot;).withIgnoreSurroundingSpaces(true);
080 * </pre>
081 *
082 * <h2>Defining column names</h2>
083 *
084 * <p>
085 * To define the column names you want to use to access records, write:
086 * </p>
087 *
088 * <pre>
089 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;);
090 * </pre>
091 *
092 * <p>
093 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and
094 * assumes that your CSV source does not contain a first record that also defines column names.
095 *
096 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
097 * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
098 * </p>
099 *
100 * <h2>Parsing</h2>
101 *
102 * <p>
103 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
104 * </p>
105 *
106 * <pre>
107 * Reader in = ...;
108 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;).parse(in);
109 * </pre>
110 *
111 * <p>
112 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
113 * </p>
114 *
115 * <h2>Referencing columns safely</h2>
116 *
117 * <p>
118 * If your source contains a header record, you can simplify your code and safely reference columns, by using
119 * {@link #withHeader(String...)} with no arguments:
120 * </p>
121 *
122 * <pre>
123 * CSVFormat.EXCEL.withHeader();
124 * </pre>
125 *
126 * <p>
127 * This causes the parser to read the first record and use its values as column names.
128 *
129 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
130 * </p>
131 *
132 * <pre>
133 * String value = record.get(&quot;Col1&quot;);
134 * </pre>
135 *
136 * <p>
137 * This makes your code impervious to changes in column order in the CSV file.
138 * </p>
139 *
140 * <h2>Notes</h2>
141 *
142 * <p>
143 * This class is immutable.
144 * </p>
145 *
146 * @version $Id: CSVFormat.java 1695190 2015-08-11 02:14:33Z ggregory $
147 */
148public final class CSVFormat implements Serializable {
149
150    /**
151     * Predefines formats.
152     * 
153     * @since 1.2
154     */
155    public static enum Predefined {
156
157        /**
158         * @see CSVFormat#DEFAULT
159         */
160        Default(CSVFormat.DEFAULT), 
161
162        /**
163         * @see CSVFormat#EXCEL
164         */
165        Excel(CSVFormat.EXCEL), 
166
167        /**
168         * @see CSVFormat#MYSQL
169         */
170        MySQL(CSVFormat.MYSQL), 
171
172        /**
173         * @see CSVFormat#RFC4180
174         */
175        RFC4180(CSVFormat.RFC4180),
176
177        /**
178         * @see CSVFormat#TDF
179         */
180        TDF(CSVFormat.TDF);
181
182        private final CSVFormat format;
183
184        private Predefined(CSVFormat format) {
185            this.format = format;
186        }
187        
188        /**
189         * Gets the format.
190         * 
191         * @return the format.
192         */
193        public CSVFormat getFormat() {
194            return format;
195        }
196    };
197    
198    private static final long serialVersionUID = 1L;
199
200    private final char delimiter;
201    private final Character quoteCharacter; // null if quoting is disabled
202    private final QuoteMode quoteMode;
203    private final Character commentMarker; // null if commenting is disabled
204    private final Character escapeCharacter; // null if escaping is disabled
205    private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
206    private final boolean allowMissingColumnNames;
207    private final boolean ignoreEmptyLines;
208    private final String recordSeparator; // for outputs
209    private final String nullString; // the string to be used for null values
210    private final String[] header; // array of header column names
211    private final String[] headerComments; // array of header comment lines
212    private final boolean skipHeaderRecord;
213
214    /**
215     * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
216     *
217     * <p>
218     * Settings are:
219     * </p>
220     * <ul>
221     * <li>withDelimiter(',')</li>
222     * <li>withQuote('"')</li>
223     * <li>withRecordSeparator("\r\n")</li>
224     * <li>withIgnoreEmptyLines(true)</li>
225     * </ul>
226     * @see Predefined#Default
227     */
228    public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true,
229            CRLF, null, null, null, false, false);
230
231    /**
232     * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
233     *
234     * <p>
235     * Settings are:
236     * </p>
237     * <ul>
238     * <li>withDelimiter(',')</li>
239     * <li>withQuote('"')</li>
240     * <li>withRecordSeparator("\r\n")</li>
241     * <li>withIgnoreEmptyLines(false)</li>
242     * </ul>
243     * @see Predefined#RFC4180
244     */
245    public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
246
247    /**
248     * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
249     * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
250     *
251     * <p>
252     * For example for parsing or generating a CSV file on a French system the following format will be used:
253     * </p>
254     *
255     * <pre>
256     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
257     * </pre>
258     *
259     * <p>
260     * Settings are:
261     * </p>
262     * <ul>
263     * <li>{@link #withDelimiter(char) withDelimiter(',')}</li>
264     * <li>{@link #withQuote(char) withQuote('"')}</li>
265     * <li>{@link #withRecordSeparator(String) withRecordSeparator("\r\n")}</li>
266     * <li>{@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}</li>
267     * <li>{@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}</li>
268     * </ul>
269     * <p>
270     * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
271     * withAllowMissingColumnNames(true)}.
272     * </p>
273     * @see Predefined#Excel
274     */
275    public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false).withAllowMissingColumnNames();
276
277    /**
278     * Tab-delimited format.
279     *
280     * <p>
281     * Settings are:
282     * </p>
283     * <ul>
284     * <li>withDelimiter('\t')</li>
285     * <li>withQuote('"')</li>
286     * <li>withRecordSeparator("\r\n")</li>
287     * <li>withIgnoreSurroundingSpaces(true)</li>
288     * </ul>
289     * @see Predefined#TDF
290     */
291    public static final CSVFormat TDF = DEFAULT.withDelimiter(TAB).withIgnoreSurroundingSpaces();
292
293    /**
294     * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
295     *
296     * <p>
297     * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
298     * characters are escaped with '\'.
299     * </p>
300     *
301     * <p>
302     * Settings are:
303     * </p>
304     * <ul>
305     * <li>withDelimiter('\t')</li>
306     * <li>withQuote(null)</li>
307     * <li>withRecordSeparator('\n')</li>
308     * <li>withIgnoreEmptyLines(false)</li>
309     * <li>withEscape('\\')</li>
310     * </ul>
311     *
312     * @see Predefined#MySQL
313     * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
314     *      http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
315     */
316    public static final CSVFormat MYSQL = DEFAULT.withDelimiter(TAB).withEscape(BACKSLASH).withIgnoreEmptyLines(false)
317            .withQuote(null).withRecordSeparator(LF);
318
319    /**
320     * Returns true if the given character is a line break character.
321     *
322     * @param c
323     *            the character to check
324     *
325     * @return true if <code>c</code> is a line break character
326     */
327    private static boolean isLineBreak(final char c) {
328        return c == LF || c == CR;
329    }
330
331    /**
332     * Returns true if the given character is a line break character.
333     *
334     * @param c
335     *            the character to check, may be null
336     *
337     * @return true if <code>c</code> is a line break character (and not null)
338     */
339    private static boolean isLineBreak(final Character c) {
340        return c != null && isLineBreak(c.charValue());
341    }
342
343    /**
344     * Creates a new CSV format with the specified delimiter.
345     *
346     * <p>
347     * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized
348     * with null/false.
349     * </p>
350     *
351     * @param delimiter
352     *            the char used for value separation, must not be a line break character
353     * @return a new CSV format.
354     * @throws IllegalArgumentException
355     *             if the delimiter is a line break character
356     *
357     * @see #DEFAULT
358     * @see #RFC4180
359     * @see #MYSQL
360     * @see #EXCEL
361     * @see #TDF
362     */
363    public static CSVFormat newFormat(final char delimiter) {
364        return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false);
365    }
366
367    /**
368     * Gets one of the predefined formats from {@link CSVFormat.Predefined}.
369     * 
370     * @param format
371     *            name
372     * @return one of the predefined formats
373     * @since 1.2
374     */
375    public static CSVFormat valueOf(final String format) {
376        return CSVFormat.Predefined.valueOf(format).getFormat();
377    }
378
379    /**
380     * Creates a customized CSV format.
381     *
382     * @param delimiter
383     *            the char used for value separation, must not be a line break character
384     * @param quoteChar
385     *            the Character used as value encapsulation marker, may be {@code null} to disable
386     * @param quoteMode
387     *            the quote mode
388     * @param commentStart
389     *            the Character used for comment identification, may be {@code null} to disable
390     * @param escape
391     *            the Character used to escape special characters in values, may be {@code null} to disable
392     * @param ignoreSurroundingSpaces
393     *            {@code true} when whitespaces enclosing values should be ignored
394     * @param ignoreEmptyLines
395     *            {@code true} when the parser should skip empty lines
396     * @param recordSeparator
397     *            the line separator to use for output
398     * @param nullString
399     *            the line separator to use for output
400     * @param headerComments
401     *            the comments to be printed by the Printer before the actual CSV data
402     * @param header
403     *            the header
404     * @param skipHeaderRecord
405     *            TODO
406     * @param allowMissingColumnNames
407     *            TODO
408     * @throws IllegalArgumentException
409     *             if the delimiter is a line break character
410     */
411    private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode,
412            final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces,
413            final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
414            final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
415            final boolean allowMissingColumnNames) {
416        this.delimiter = delimiter;
417        this.quoteCharacter = quoteChar;
418        this.quoteMode = quoteMode;
419        this.commentMarker = commentStart;
420        this.escapeCharacter = escape;
421        this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
422        this.allowMissingColumnNames = allowMissingColumnNames;
423        this.ignoreEmptyLines = ignoreEmptyLines;
424        this.recordSeparator = recordSeparator;
425        this.nullString = nullString;
426        this.headerComments = toStringArray(headerComments);
427        this.header = header == null ? null : header.clone();
428        this.skipHeaderRecord = skipHeaderRecord;
429        validate();
430    }
431
432    private String[] toStringArray(final Object[] values) {
433        if (values == null) {
434            return null;
435        }
436        final String[] strings = new String[values.length];
437        for (int i = 0; i < values.length; i++) {
438            final Object value = values[i];
439            strings[i] = value == null ? null : value.toString();
440        }
441        return strings;
442    }
443
444    @Override
445    public boolean equals(final Object obj) {
446        if (this == obj) {
447            return true;
448        }
449        if (obj == null) {
450            return false;
451        }
452        if (getClass() != obj.getClass()) {
453            return false;
454        }
455
456        final CSVFormat other = (CSVFormat) obj;
457        if (delimiter != other.delimiter) {
458            return false;
459        }
460        if (quoteMode != other.quoteMode) {
461            return false;
462        }
463        if (quoteCharacter == null) {
464            if (other.quoteCharacter != null) {
465                return false;
466            }
467        } else if (!quoteCharacter.equals(other.quoteCharacter)) {
468            return false;
469        }
470        if (commentMarker == null) {
471            if (other.commentMarker != null) {
472                return false;
473            }
474        } else if (!commentMarker.equals(other.commentMarker)) {
475            return false;
476        }
477        if (escapeCharacter == null) {
478            if (other.escapeCharacter != null) {
479                return false;
480            }
481        } else if (!escapeCharacter.equals(other.escapeCharacter)) {
482            return false;
483        }
484        if (nullString == null) {
485            if (other.nullString != null) {
486                return false;
487            }
488        } else if (!nullString.equals(other.nullString)) {
489            return false;
490        }
491        if (!Arrays.equals(header, other.header)) {
492            return false;
493        }
494        if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
495            return false;
496        }
497        if (ignoreEmptyLines != other.ignoreEmptyLines) {
498            return false;
499        }
500        if (skipHeaderRecord != other.skipHeaderRecord) {
501            return false;
502        }
503        if (recordSeparator == null) {
504            if (other.recordSeparator != null) {
505                return false;
506            }
507        } else if (!recordSeparator.equals(other.recordSeparator)) {
508            return false;
509        }
510        return true;
511    }
512
513    /**
514     * Formats the specified values.
515     *
516     * @param values
517     *            the values to format
518     * @return the formatted values
519     */
520    public String format(final Object... values) {
521        final StringWriter out = new StringWriter();
522        try {
523            new CSVPrinter(out, this).printRecord(values);
524            return out.toString().trim();
525        } catch (final IOException e) {
526            // should not happen because a StringWriter does not do IO.
527            throw new IllegalStateException(e);
528        }
529    }
530
531    /**
532     * Returns the character marking the start of a line comment.
533     *
534     * @return the comment start marker, may be {@code null}
535     */
536    public Character getCommentMarker() {
537        return commentMarker;
538    }
539
540    /**
541     * Returns the character delimiting the values (typically ';', ',' or '\t').
542     *
543     * @return the delimiter character
544     */
545    public char getDelimiter() {
546        return delimiter;
547    }
548
549    /**
550     * Returns the escape character.
551     *
552     * @return the escape character, may be {@code null}
553     */
554    public Character getEscapeCharacter() {
555        return escapeCharacter;
556    }
557
558    /**
559     * Returns a copy of the header array.
560     *
561     * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
562     */
563    public String[] getHeader() {
564        return header != null ? header.clone() : null;
565    }
566
567    /**
568     * Returns a copy of the header comment array.
569     *
570     * @return a copy of the header comment array; {@code null} if disabled.
571     */
572    public String[] getHeaderComments() {
573        return headerComments != null ? headerComments.clone() : null;
574    }
575
576    /**
577     * Specifies whether missing column names are allowed when parsing the header line.
578     *
579     * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
580     *         {@link IllegalArgumentException}.
581     */
582    public boolean getAllowMissingColumnNames() {
583        return allowMissingColumnNames;
584    }
585
586    /**
587     * Specifies whether empty lines between records are ignored when parsing input.
588     *
589     * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
590     *         records.
591     */
592    public boolean getIgnoreEmptyLines() {
593        return ignoreEmptyLines;
594    }
595
596    /**
597     * Specifies whether spaces around values are ignored when parsing input.
598     *
599     * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
600     */
601    public boolean getIgnoreSurroundingSpaces() {
602        return ignoreSurroundingSpaces;
603    }
604
605    /**
606     * Gets the String to convert to and from {@code null}.
607     * <ul>
608     * <li>
609     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
610     * records.</li>
611     * <li>
612     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
613     * </ul>
614     *
615     * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
616     */
617    public String getNullString() {
618        return nullString;
619    }
620
621    /**
622     * Returns the character used to encapsulate values containing special characters.
623     *
624     * @return the quoteChar character, may be {@code null}
625     */
626    public Character getQuoteCharacter() {
627        return quoteCharacter;
628    }
629
630    /**
631     * Returns the quote policy output fields.
632     *
633     * @return the quote policy
634     */
635    public QuoteMode getQuoteMode() {
636        return quoteMode;
637    }
638
639    /**
640     * Returns the record separator delimiting output records.
641     *
642     * @return the record separator
643     */
644    public String getRecordSeparator() {
645        return recordSeparator;
646    }
647
648    /**
649     * Returns whether to skip the header record.
650     *
651     * @return whether to skip the header record.
652     */
653    public boolean getSkipHeaderRecord() {
654        return skipHeaderRecord;
655    }
656
657    @Override
658    public int hashCode() {
659        final int prime = 31;
660        int result = 1;
661
662        result = prime * result + delimiter;
663        result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
664        result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
665        result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
666        result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
667        result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
668        result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
669        result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
670        result = prime * result + (skipHeaderRecord ? 1231 : 1237);
671        result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
672        result = prime * result + Arrays.hashCode(header);
673        return result;
674    }
675
676    /**
677     * Specifies whether comments are supported by this format.
678     *
679     * Note that the comment introducer character is only recognized at the start of a line.
680     *
681     * @return {@code true} is comments are supported, {@code false} otherwise
682     */
683    public boolean isCommentMarkerSet() {
684        return commentMarker != null;
685    }
686
687    /**
688     * Returns whether escape are being processed.
689     *
690     * @return {@code true} if escapes are processed
691     */
692    public boolean isEscapeCharacterSet() {
693        return escapeCharacter != null;
694    }
695
696    /**
697     * Returns whether a nullString has been defined.
698     *
699     * @return {@code true} if a nullString is defined
700     */
701    public boolean isNullStringSet() {
702        return nullString != null;
703    }
704
705    /**
706     * Returns whether a quoteChar has been defined.
707     *
708     * @return {@code true} if a quoteChar is defined
709     */
710    public boolean isQuoteCharacterSet() {
711        return quoteCharacter != null;
712    }
713
714    /**
715     * Parses the specified content.
716     *
717     * <p>
718     * See also the various static parse methods on {@link CSVParser}.
719     * </p>
720     *
721     * @param in
722     *            the input stream
723     * @return a parser over a stream of {@link CSVRecord}s.
724     * @throws IOException
725     *             If an I/O error occurs
726     */
727    public CSVParser parse(final Reader in) throws IOException {
728        return new CSVParser(in, this);
729    }
730
731    /**
732     * Prints to the specified output.
733     *
734     * <p>
735     * See also {@link CSVPrinter}.
736     * </p>
737     *
738     * @param out
739     *            the output
740     * @return a printer to an output
741     * @throws IOException
742     *             thrown if the optional header cannot be printed.
743     */
744    public CSVPrinter print(final Appendable out) throws IOException {
745        return new CSVPrinter(out, this);
746    }
747
748    @Override
749    public String toString() {
750        final StringBuilder sb = new StringBuilder();
751        sb.append("Delimiter=<").append(delimiter).append('>');
752        if (isEscapeCharacterSet()) {
753            sb.append(' ');
754            sb.append("Escape=<").append(escapeCharacter).append('>');
755        }
756        if (isQuoteCharacterSet()) {
757            sb.append(' ');
758            sb.append("QuoteChar=<").append(quoteCharacter).append('>');
759        }
760        if (isCommentMarkerSet()) {
761            sb.append(' ');
762            sb.append("CommentStart=<").append(commentMarker).append('>');
763        }
764        if (isNullStringSet()) {
765            sb.append(' ');
766            sb.append("NullString=<").append(nullString).append('>');
767        }
768        if (recordSeparator != null) {
769            sb.append(' ');
770            sb.append("RecordSeparator=<").append(recordSeparator).append('>');
771        }
772        if (getIgnoreEmptyLines()) {
773            sb.append(" EmptyLines:ignored");
774        }
775        if (getIgnoreSurroundingSpaces()) {
776            sb.append(" SurroundingSpaces:ignored");
777        }
778        sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
779        if (headerComments != null) {
780            sb.append(' ');
781            sb.append("HeaderComments:").append(Arrays.toString(headerComments));
782        }
783        if (header != null) {
784            sb.append(' ');
785            sb.append("Header:").append(Arrays.toString(header));
786        }
787        return sb.toString();
788    }
789
790    /**
791     * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
792     *
793     * @throws IllegalArgumentException
794     */
795    private void validate() throws IllegalArgumentException {
796        if (isLineBreak(delimiter)) {
797            throw new IllegalArgumentException("The delimiter cannot be a line break");
798        }
799        
800        if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
801            throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" +
802                    quoteCharacter + "')");
803        }
804
805        if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
806            throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" +
807                    escapeCharacter + "')");
808        }
809
810        if (commentMarker != null && delimiter == commentMarker.charValue()) {
811            throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" +
812                    commentMarker + "')");
813        }
814
815        if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
816            throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" +
817                    commentMarker + "')");
818        }
819
820        if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
821            throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" +
822                    commentMarker + "')");
823        }
824
825        if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
826            throw new IllegalArgumentException("No quotes mode set but no escape character is set");
827        }
828        
829        // validate header
830        if (header != null) {
831            final Set<String> dupCheck = new HashSet<String>();
832            for (final String hdr : header) {
833                if (!dupCheck.add(hdr)) {
834                    throw new IllegalArgumentException("The header contains a duplicate entry: '" + hdr + "' in " +
835                            Arrays.toString(header));
836                }
837            }
838        }
839    }
840
841    /**
842     * Sets the comment start marker of the format to the specified character.
843     *
844     * Note that the comment start character is only recognized at the start of a line.
845     *
846     * @param commentMarker
847     *            the comment start marker
848     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
849     * @throws IllegalArgumentException
850     *             thrown if the specified character is a line break
851     */
852    public CSVFormat withCommentMarker(final char commentMarker) {
853        return withCommentMarker(Character.valueOf(commentMarker));
854    }
855
856    /**
857     * Sets the comment start marker of the format to the specified character.
858     *
859     * Note that the comment start character is only recognized at the start of a line.
860     *
861     * @param commentMarker
862     *            the comment start marker, use {@code null} to disable
863     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
864     * @throws IllegalArgumentException
865     *             thrown if the specified character is a line break
866     */
867    public CSVFormat withCommentMarker(final Character commentMarker) {
868        if (isLineBreak(commentMarker)) {
869            throw new IllegalArgumentException("The comment start marker character cannot be a line break");
870        }
871        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
872                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
873                skipHeaderRecord, allowMissingColumnNames);
874    }
875
876    /**
877     * Sets the delimiter of the format to the specified character.
878     *
879     * @param delimiter
880     *            the delimiter character
881     * @return A new CSVFormat that is equal to this with the specified character as delimiter
882     * @throws IllegalArgumentException
883     *             thrown if the specified character is a line break
884     */
885    public CSVFormat withDelimiter(final char delimiter) {
886        if (isLineBreak(delimiter)) {
887            throw new IllegalArgumentException("The delimiter cannot be a line break");
888        }
889        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
890                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
891                skipHeaderRecord, allowMissingColumnNames);
892    }
893
894    /**
895     * Sets the escape character of the format to the specified character.
896     *
897     * @param escape
898     *            the escape character
899     * @return A new CSVFormat that is equal to his but with the specified character as the escape character
900     * @throws IllegalArgumentException
901     *             thrown if the specified character is a line break
902     */
903    public CSVFormat withEscape(final char escape) {
904        return withEscape(Character.valueOf(escape));
905    }
906
907    /**
908     * Sets the escape character of the format to the specified character.
909     *
910     * @param escape
911     *            the escape character, use {@code null} to disable
912     * @return A new CSVFormat that is equal to this but with the specified character as the escape character
913     * @throws IllegalArgumentException
914     *             thrown if the specified character is a line break
915     */
916    public CSVFormat withEscape(final Character escape) {
917        if (isLineBreak(escape)) {
918            throw new IllegalArgumentException("The escape character cannot be a line break");
919        }
920        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
921                ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
922                allowMissingColumnNames);
923    }
924
925    /**
926     * Sets the header of the format. The header can either be parsed automatically from the input file with:
927     *
928     * <pre>
929     * CSVFormat format = aformat.withHeader();
930     * </pre>
931     *
932     * or specified manually with:
933     *
934     * <pre>
935     * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
936     * </pre>
937     * <p>
938     * The header is also used by the {@link CSVPrinter}..
939     * </p>
940     *
941     * @param header
942     *            the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
943     *
944     * @return A new CSVFormat that is equal to this but with the specified header
945     * @see #withSkipHeaderRecord(boolean)
946     */
947    public CSVFormat withHeader(final String... header) {
948        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
949                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
950                skipHeaderRecord, allowMissingColumnNames);
951    }
952
953    /**
954     * Sets the header of the format. The header can either be parsed automatically from the input file with:
955     *
956     * <pre>
957     * CSVFormat format = aformat.withHeader();
958     * </pre>
959     *
960     * or specified manually with:
961     *
962     * <pre>
963     * CSVFormat format = aformat.withHeader(resultSet);
964     * </pre>
965     * <p>
966     * The header is also used by the {@link CSVPrinter}..
967     * </p>
968     *
969     * @param resultSet
970     *            the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified
971     *            otherwise.
972     *
973     * @return A new CSVFormat that is equal to this but with the specified header
974     * @throws SQLException
975     *             SQLException if a database access error occurs or this method is called on a closed result set.
976     * @since 1.1
977     */
978    public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
979        return withHeader(resultSet != null ? resultSet.getMetaData() : null);
980    }
981
982    /**
983     * Sets the header of the format. The header can either be parsed automatically from the input file with:
984     *
985     * <pre>
986     * CSVFormat format = aformat.withHeader();
987     * </pre>
988     *
989     * or specified manually with:
990     *
991     * <pre>
992     * CSVFormat format = aformat.withHeader(metaData);
993     * </pre>
994     * <p>
995     * The header is also used by the {@link CSVPrinter}..
996     * </p>
997     *
998     * @param metaData
999     *            the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified
1000     *            otherwise.
1001     *
1002     * @return A new CSVFormat that is equal to this but with the specified header
1003     * @throws SQLException
1004     *             SQLException if a database access error occurs or this method is called on a closed result set.
1005     * @since 1.1
1006     */
1007    public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException {
1008        String[] labels = null;
1009        if (metaData != null) {
1010            final int columnCount = metaData.getColumnCount();
1011            labels = new String[columnCount];
1012            for (int i = 0; i < columnCount; i++) {
1013                labels[i] = metaData.getColumnLabel(i + 1);
1014            }
1015        }
1016        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1017                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, labels,
1018                skipHeaderRecord, allowMissingColumnNames);
1019    }
1020
1021    /**
1022     * Sets the header comments of the format. The comments will be printed first, before the headers. This setting is
1023     * ignored by the parser.
1024     *
1025     * <pre>
1026     * CSVFormat format = aformat.withHeaderComments(&quot;Generated by Apache Commons CSV 1.1.&quot;, new Date());
1027     * </pre>
1028     *
1029     * @param headerComments
1030     *            the headerComments which will be printed by the Printer before the actual CSV data.
1031     *
1032     * @return A new CSVFormat that is equal to this but with the specified header
1033     * @see #withSkipHeaderRecord(boolean)
1034     * @since 1.1
1035     */
1036    public CSVFormat withHeaderComments(final Object... headerComments) {
1037        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1038                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1039                skipHeaderRecord, allowMissingColumnNames);
1040    }
1041
1042    /**
1043     * Sets the missing column names behavior of the format to {@code true}
1044     *
1045     * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1046     * @see #withAllowMissingColumnNames(boolean)
1047     * @since 1.1
1048     */
1049    public CSVFormat withAllowMissingColumnNames() {
1050        return this.withAllowMissingColumnNames(true);
1051    }
1052
1053    /**
1054     * Sets the missing column names behavior of the format.
1055     *
1056     * @param allowMissingColumnNames
1057     *            the missing column names behavior, {@code true} to allow missing column names in the header line,
1058     *            {@code false} to cause an {@link IllegalArgumentException} to be thrown.
1059     * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1060     */
1061    public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
1062        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1063                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1064                skipHeaderRecord, allowMissingColumnNames);
1065    }
1066
1067    /**
1068     * Sets the empty line skipping behavior of the format to {@code true}.
1069     *
1070     * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1071     * @since {@link #withIgnoreEmptyLines(boolean)}
1072     * @since 1.1
1073     */
1074    public CSVFormat withIgnoreEmptyLines() {
1075        return this.withIgnoreEmptyLines(true);
1076    }
1077
1078    /**
1079     * Sets the empty line skipping behavior of the format.
1080     *
1081     * @param ignoreEmptyLines
1082     *            the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
1083     *            {@code false} to translate empty lines to empty records.
1084     * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1085     */
1086    public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
1087        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1088                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1089                skipHeaderRecord, allowMissingColumnNames);
1090    }
1091
1092    /**
1093     * Sets the trimming behavior of the format to {@code true}.
1094     *
1095     * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1096     * @see #withIgnoreSurroundingSpaces(boolean)
1097     * @since 1.1
1098     */
1099    public CSVFormat withIgnoreSurroundingSpaces() {
1100        return this.withIgnoreSurroundingSpaces(true);
1101    }
1102
1103    /**
1104     * Sets the trimming behavior of the format.
1105     *
1106     * @param ignoreSurroundingSpaces
1107     *            the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the
1108     *            spaces as is.
1109     * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1110     */
1111    public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
1112        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1113                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1114                skipHeaderRecord, allowMissingColumnNames);
1115    }
1116
1117    /**
1118     * Performs conversions to and from null for strings on input and output.
1119     * <ul>
1120     * <li>
1121     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
1122     * records.</li>
1123     * <li>
1124     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
1125     * </ul>
1126     *
1127     * @param nullString
1128     *            the String to convert to and from {@code null}. No substitution occurs if {@code null}
1129     *
1130     * @return A new CSVFormat that is equal to this but with the specified null conversion string.
1131     */
1132    public CSVFormat withNullString(final String nullString) {
1133        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1134                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1135                skipHeaderRecord, allowMissingColumnNames);
1136    }
1137
1138    /**
1139     * Sets the quoteChar of the format to the specified character.
1140     *
1141     * @param quoteChar
1142     *            the quoteChar character
1143     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1144     * @throws IllegalArgumentException
1145     *             thrown if the specified character is a line break
1146     */
1147    public CSVFormat withQuote(final char quoteChar) {
1148        return withQuote(Character.valueOf(quoteChar));
1149    }
1150
1151    /**
1152     * Sets the quoteChar of the format to the specified character.
1153     *
1154     * @param quoteChar
1155     *            the quoteChar character, use {@code null} to disable
1156     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1157     * @throws IllegalArgumentException
1158     *             thrown if the specified character is a line break
1159     */
1160    public CSVFormat withQuote(final Character quoteChar) {
1161        if (isLineBreak(quoteChar)) {
1162            throw new IllegalArgumentException("The quoteChar cannot be a line break");
1163        }
1164        return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
1165                ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1166                allowMissingColumnNames);
1167    }
1168
1169    /**
1170     * Sets the output quote policy of the format to the specified value.
1171     *
1172     * @param quoteModePolicy
1173     *            the quote policy to use for output.
1174     *
1175     * @return A new CSVFormat that is equal to this but with the specified quote policy
1176     */
1177    public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
1178        return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
1179                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1180                skipHeaderRecord, allowMissingColumnNames);
1181    }
1182
1183    /**
1184     * Sets the record separator of the format to the specified character.
1185     *
1186     * <p>
1187     * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1188     * only works for inputs with '\n', '\r' and "\r\n"
1189     * </p>
1190     *
1191     * @param recordSeparator
1192     *            the record separator to use for output.
1193     *
1194     * @return A new CSVFormat that is equal to this but with the the specified output record separator
1195     */
1196    public CSVFormat withRecordSeparator(final char recordSeparator) {
1197        return withRecordSeparator(String.valueOf(recordSeparator));
1198    }
1199
1200    /**
1201     * Sets the record separator of the format to the specified String.
1202     *
1203     * <p>
1204     * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1205     * only works for inputs with '\n', '\r' and "\r\n"
1206     * </p>
1207     *
1208     * @param recordSeparator
1209     *            the record separator to use for output.
1210     *
1211     * @return A new CSVFormat that is equal to this but with the the specified output record separator
1212     * @throws IllegalArgumentException
1213     *             if recordSeparator is none of CR, LF or CRLF
1214     */
1215    public CSVFormat withRecordSeparator(final String recordSeparator) {
1216        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1217                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1218                skipHeaderRecord, allowMissingColumnNames);
1219    }
1220
1221    /**
1222     * Sets skipping the header record to {@code true}.
1223     *
1224     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1225     * @see #withSkipHeaderRecord(boolean)
1226     * @see #withHeader(String...)
1227     * @since 1.1
1228     */
1229    public CSVFormat withSkipHeaderRecord() {
1230        return this.withSkipHeaderRecord(true);
1231    }
1232
1233    /**
1234     * Sets whether to skip the header record.
1235     *
1236     * @param skipHeaderRecord
1237     *            whether to skip the header record.
1238     *
1239     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1240     * @see #withHeader(String...)
1241     */
1242    public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
1243        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1244                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1245                skipHeaderRecord, allowMissingColumnNames);
1246    }
1247}