001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.csv;
019
020import static org.apache.commons.csv.Constants.BACKSLASH;
021import static org.apache.commons.csv.Constants.COMMA;
022import static org.apache.commons.csv.Constants.CR;
023import static org.apache.commons.csv.Constants.CRLF;
024import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
025import static org.apache.commons.csv.Constants.LF;
026import static org.apache.commons.csv.Constants.TAB;
027
028import java.io.IOException;
029import java.io.Reader;
030import java.io.Serializable;
031import java.io.StringWriter;
032import java.util.Arrays;
033import java.util.HashSet;
034import java.util.Set;
035
036/**
037 * Specifies the format of a CSV file and parses input.
038 *
039 * <h2>Using predefined formats</h2>
040 *
041 * <p>
042 * You can use one of the predefined formats:
043 * </p>
044 *
045 * <ul>
046 *      <li>{@link #DEFAULT}</li>
047 *      <li>{@link #EXCEL}</li>
048 *      <li>{@link #MYSQL}</li>
049 *      <li>{@link #RFC4180}</li>
050 *      <li>{@link #TDF}</li>
051 * </ul>
052 *
053 * <p>
054 * For example:
055 * </p>
056 *
057 * <pre>
058 * CSVParser parser = CSVFormat.EXCEL.parse(reader);
059 * </pre>
060 *
061 * <p>
062 * The {@link CSVParser} provides static methods to parse other input types, for example:
063 * </p>
064 *
065 * <pre>CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);</pre>
066 *
067 * <h2>Defining formats</h2>
068 *
069 * <p>
070 * You can extend a format by calling the {@code with} methods. For example:
071 * </p>
072 *
073 * <pre>
074 * CSVFormat.EXCEL
075 *   .withNullString(&quot;N/A&quot;)
076 *   .withIgnoreSurroundingSpaces(true);
077 * </pre>
078 *
079 * <h2>Defining column names</h2>
080 *
081 * <p>
082 * To define the column names you want to use to access records, write:
083 * </p>
084 *
085 * <pre>
086 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;);
087 * </pre>
088 *
089 * <p>
090 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and
091 * assumes that your CSV source does not contain a first record that also defines column names.
092 *
093 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
094 * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
095 * </p>
096 *
097 * <h2>Parsing</h2>
098 *
099 * <p>
100 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
101 * </p>
102 *
103 * <pre>
104 * Reader in = ...;
105 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;).parse(in);
106 * </pre>
107 *
108 * <p>
109 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
110 * </p>
111 *
112 * <h2>Referencing columns safely</h2>
113 *
114 * <p>
115 * If your source contains a header record, you can simplify your code and safely reference columns,
116 * by using {@link #withHeader(String...)} with no arguments:
117 * </p>
118 *
119 * <pre>
120 * CSVFormat.EXCEL.withHeader();
121 * </pre>
122 *
123 * <p>
124 * This causes the parser to read the first record and use its values as column names.
125 *
126 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
127 * </p>
128 *
129 * <pre>
130 * String value = record.get(&quot;Col1&quot;);
131 * </pre>
132 *
133 * <p>
134 * This makes your code impervious to changes in column order in the CSV file.
135 * </p>
136 *
137 * <h2>Notes</h2>
138 *
139 * <p>
140 * This class is immutable.
141 * </p>
142 *
143 * @version $Id: CSVFormat.java 1617076 2014-08-10 09:23:01Z britter $
144 */
145public final class CSVFormat implements Serializable {
146
147    private static final long serialVersionUID = 1L;
148
149    private final char delimiter;
150    private final Character quoteCharacter; // null if quoting is disabled
151    private final QuoteMode quoteMode;
152    private final Character commentMarker; // null if commenting is disabled
153    private final Character escapeCharacter; // null if escaping is disabled
154    private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
155    private final boolean allowMissingColumnNames;
156    private final boolean ignoreEmptyLines;
157    private final String recordSeparator; // for outputs
158    private final String nullString; // the string to be used for null values
159    private final String[] header; // array of header column names
160    private final boolean skipHeaderRecord;
161
162    /**
163     * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
164     *
165     * <p>
166     * Settings are:
167     * </p>
168     * <ul>
169     *   <li>withDelimiter(',')</li>
170     *   <li>withQuoteChar('"')</li>
171     *   <li>withRecordSeparator("\r\n")</li>
172     *   <li>withIgnoreEmptyLines(true)</li>
173     * </ul>
174     */
175    public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null,
176                                                            false, true, CRLF, null, null, false, false);
177
178    /**
179     * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
180     *
181     * <p>
182     * Settings are:
183     * </p>
184     * <ul>
185     *   <li>withDelimiter(',')</li>
186     *   <li>withQuoteChar('"')</li>
187     *   <li>withRecordSeparator("\r\n")</li>
188     *   <li>withIgnoreEmptyLines(false)</li>
189     * </ul>
190     */
191    public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
192
193    /**
194     * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
195     * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
196     *
197     * <p>
198     * For example for parsing or generating a CSV file on a French system the following format will be used:
199     * </p>
200     *
201     * <pre>
202     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
203     * </pre>
204     *
205     * <p>
206     * Settings are:
207     * </p>
208     * <ul>
209     *   <li>withDelimiter(',')</li>
210     *   <li>withQuoteChar('"')</li>
211     *   <li>withRecordSeparator("\r\n")</li>
212     *   <li>withIgnoreEmptyLines(false)</li>
213     * </ul>
214     * <p>
215     * Note: this is currently the same as {@link #RFC4180}.
216     * </p>
217     */
218    public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false);
219
220    /**
221     * Tab-delimited format.
222     *
223     * <p>
224     * Settings are:
225     * </p>
226     * <ul>
227     *   <li>withDelimiter('\t')</li>
228     *   <li>withQuoteChar('"')</li>
229     *   <li>withRecordSeparator("\r\n")</li>
230     *   <li>withIgnoreSurroundingSpaces(true)</li>
231     * </ul>
232     */
233    public static final CSVFormat TDF =
234            DEFAULT
235            .withDelimiter(TAB)
236            .withIgnoreSurroundingSpaces(true);
237
238    /**
239     * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
240     *
241     * <p>
242     * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
243     * characters are escaped with '\'.
244     * </p>
245     *
246     * <p>
247     * Settings are:
248     * </p>
249     * <ul>
250     *   <li>withDelimiter('\t')</li>
251     *   <li>withQuoteChar(null)</li>
252     *   <li>withRecordSeparator('\n')</li>
253     *   <li>withIgnoreEmptyLines(false)</li>
254     *   <li>withEscape('\\')</li>
255     * </ul>
256     * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
257     *      http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
258     */
259    public static final CSVFormat MYSQL =
260            DEFAULT
261            .withDelimiter(TAB)
262            .withEscape(BACKSLASH)
263            .withIgnoreEmptyLines(false)
264            .withQuote(null)
265            .withRecordSeparator(LF);
266
267    /**
268     * Returns true if the given character is a line break character.
269     *
270     * @param c
271     *            the character to check
272     *
273     * @return true if <code>c</code> is a line break character
274     */
275    private static boolean isLineBreak(final char c) {
276        return c == LF || c == CR;
277    }
278
279    /**
280     * Returns true if the given character is a line break character.
281     *
282     * @param c
283     *            the character to check, may be null
284     *
285     * @return true if <code>c</code> is a line break character (and not null)
286     */
287    private static boolean isLineBreak(final Character c) {
288        return c != null && isLineBreak(c.charValue());
289    }
290
291    /**
292     * Creates a new CSV format with the specified delimiter.
293     *
294     * <p>Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be
295     * initialized with null/false.</p>
296     *
297     * @param delimiter
298     *            the char used for value separation, must not be a line break character
299     * @return a new CSV format.
300     * @throws IllegalArgumentException if the delimiter is a line break character
301     *
302     * @see #DEFAULT
303     * @see #RFC4180
304     * @see #MYSQL
305     * @see #EXCEL
306     * @see #TDF
307     */
308    public static CSVFormat newFormat(final char delimiter) {
309        return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false, false);
310    }
311
312    /**
313     * Creates a customized CSV format.
314     *
315     * @param delimiter
316     *            the char used for value separation, must not be a line break character
317     * @param quoteChar
318     *            the Character used as value encapsulation marker, may be {@code null} to disable
319     * @param quoteMode
320     *            the quote mode
321     * @param commentStart
322     *            the Character used for comment identification, may be {@code null} to disable
323     * @param escape
324     *            the Character used to escape special characters in values, may be {@code null} to disable
325     * @param ignoreSurroundingSpaces
326     *            {@code true} when whitespaces enclosing values should be ignored
327     * @param ignoreEmptyLines
328     *            {@code true} when the parser should skip empty lines
329     * @param recordSeparator
330     *            the line separator to use for output
331     * @param nullString
332     *            the line separator to use for output
333     * @param header
334     *            the header
335     * @param skipHeaderRecord TODO
336     * @param allowMissingColumnNames TODO
337     * @throws IllegalArgumentException if the delimiter is a line break character
338     */
339    private CSVFormat(final char delimiter, final Character quoteChar,
340            final QuoteMode quoteMode, final Character commentStart,
341            final Character escape, final boolean ignoreSurroundingSpaces,
342            final boolean ignoreEmptyLines, final String recordSeparator,
343            final String nullString, final String[] header, final boolean skipHeaderRecord,
344            final boolean allowMissingColumnNames) {
345        if (isLineBreak(delimiter)) {
346            throw new IllegalArgumentException("The delimiter cannot be a line break");
347        }
348        this.delimiter = delimiter;
349        this.quoteCharacter = quoteChar;
350        this.quoteMode = quoteMode;
351        this.commentMarker = commentStart;
352        this.escapeCharacter = escape;
353        this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
354        this.allowMissingColumnNames = allowMissingColumnNames;
355        this.ignoreEmptyLines = ignoreEmptyLines;
356        this.recordSeparator = recordSeparator;
357        this.nullString = nullString;
358        if (header == null) {
359            this.header = null;
360        } else {
361            final Set<String> dupCheck = new HashSet<String>();
362            for (final String hdr : header) {
363                if (!dupCheck.add(hdr)) {
364                    throw new IllegalArgumentException("The header contains a duplicate entry: '" + hdr + "' in " +
365                            Arrays.toString(header));
366                }
367            }
368            this.header = header.clone();
369        }
370        this.skipHeaderRecord = skipHeaderRecord;
371        validate();
372    }
373
374    @Override
375    public boolean equals(final Object obj) {
376        if (this == obj) {
377            return true;
378        }
379        if (obj == null) {
380            return false;
381        }
382        if (getClass() != obj.getClass()) {
383            return false;
384        }
385
386        final CSVFormat other = (CSVFormat) obj;
387        if (delimiter != other.delimiter) {
388            return false;
389        }
390        if (quoteMode != other.quoteMode) {
391            return false;
392        }
393        if (quoteCharacter == null) {
394            if (other.quoteCharacter != null) {
395                return false;
396            }
397        } else if (!quoteCharacter.equals(other.quoteCharacter)) {
398            return false;
399        }
400        if (commentMarker == null) {
401            if (other.commentMarker != null) {
402                return false;
403            }
404        } else if (!commentMarker.equals(other.commentMarker)) {
405            return false;
406        }
407        if (escapeCharacter == null) {
408            if (other.escapeCharacter != null) {
409                return false;
410            }
411        } else if (!escapeCharacter.equals(other.escapeCharacter)) {
412            return false;
413        }
414        if (nullString == null) {
415            if (other.nullString != null) {
416                return false;
417            }
418        } else if (!nullString.equals(other.nullString)) {
419            return false;
420        }
421        if (!Arrays.equals(header, other.header)) {
422            return false;
423        }
424        if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
425            return false;
426        }
427        if (ignoreEmptyLines != other.ignoreEmptyLines) {
428            return false;
429        }
430        if (skipHeaderRecord != other.skipHeaderRecord) {
431            return false;
432        }
433        if (recordSeparator == null) {
434            if (other.recordSeparator != null) {
435                return false;
436            }
437        } else if (!recordSeparator.equals(other.recordSeparator)) {
438            return false;
439        }
440        return true;
441    }
442
443    /**
444     * Formats the specified values.
445     *
446     * @param values
447     *            the values to format
448     * @return the formatted values
449     */
450    public String format(final Object... values) {
451        final StringWriter out = new StringWriter();
452        try {
453            new CSVPrinter(out, this).printRecord(values);
454            return out.toString().trim();
455        } catch (final IOException e) {
456            // should not happen because a StringWriter does not do IO.
457            throw new IllegalStateException(e);
458        }
459    }
460
461    /**
462     * Returns the character marking the start of a line comment.
463     *
464     * @return the comment start marker, may be {@code null}
465     */
466    public Character getCommentMarker() {
467        return commentMarker;
468    }
469
470    /**
471     * Returns the character delimiting the values (typically ';', ',' or '\t').
472     *
473     * @return the delimiter character
474     */
475    public char getDelimiter() {
476        return delimiter;
477    }
478
479    /**
480     * Returns the escape character.
481     *
482     * @return the escape character, may be {@code null}
483     */
484    public Character getEscapeCharacter() {
485        return escapeCharacter;
486    }
487
488    /**
489     * Returns a copy of the header array.
490     *
491     * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
492     */
493    public String[] getHeader() {
494        return header != null ? header.clone() : null;
495    }
496
497    /**
498     * Specifies whether missing column names are allowed when parsing the header line.
499     *
500     * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
501     *         {@link IllegalArgumentException}.
502     */
503    public boolean getAllowMissingColumnNames() {
504        return allowMissingColumnNames;
505    }
506
507    /**
508     * Specifies whether empty lines between records are ignored when parsing input.
509     *
510     * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
511     *         records.
512     */
513    public boolean getIgnoreEmptyLines() {
514        return ignoreEmptyLines;
515    }
516
517    /**
518     * Specifies whether spaces around values are ignored when parsing input.
519     *
520     * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the
521     *         value.
522     */
523    public boolean getIgnoreSurroundingSpaces() {
524        return ignoreSurroundingSpaces;
525    }
526
527    /**
528     * Gets the String to convert to and from {@code null}.
529     * <ul>
530     * <li>
531     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
532     * records.
533     * </li>
534     * <li>
535     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
536     * </ul>
537     *
538     * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
539     */
540    public String getNullString() {
541        return nullString;
542    }
543
544    /**
545     * Returns the character used to encapsulate values containing special characters.
546     *
547     * @return the quoteChar character, may be {@code null}
548     */
549    public Character getQuoteCharacter() {
550        return quoteCharacter;
551    }
552
553    /**
554     * Returns the quote policy output fields.
555     *
556     * @return the quote policy
557     */
558    public QuoteMode getQuoteMode() {
559        return quoteMode;
560    }
561
562    /**
563     * Returns the record separator delimiting output records.
564     *
565     * @return the record separator
566     */
567    public String getRecordSeparator() {
568        return recordSeparator;
569    }
570
571    /**
572     * Returns whether to skip the header record.
573     *
574     * @return whether to skip the header record.
575     */
576    public boolean getSkipHeaderRecord() {
577        return skipHeaderRecord;
578    }
579
580    @Override
581    public int hashCode()
582    {
583        final int prime = 31;
584        int result = 1;
585
586        result = prime * result + delimiter;
587        result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
588        result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
589        result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
590        result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
591        result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
592        result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
593        result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
594        result = prime * result + (skipHeaderRecord ? 1231 : 1237);
595        result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
596        result = prime * result + Arrays.hashCode(header);
597        return result;
598    }
599
600    /**
601     * Specifies whether comments are supported by this format.
602     *
603     * Note that the comment introducer character is only recognized at the start of a line.
604     *
605     * @return {@code true} is comments are supported, {@code false} otherwise
606     */
607    public boolean isCommentMarkerSet() {
608        return commentMarker != null;
609    }
610
611    /**
612     * Returns whether escape are being processed.
613     *
614     * @return {@code true} if escapes are processed
615     */
616    public boolean isEscapeCharacterSet() {
617        return escapeCharacter != null;
618    }
619
620    /**
621     * Returns whether a nullString has been defined.
622     *
623     * @return {@code true} if a nullString is defined
624     */
625    public boolean isNullStringSet() {
626        return nullString != null;
627    }
628
629    /**
630     * Returns whether a quoteChar has been defined.
631     *
632     * @return {@code true} if a quoteChar is defined
633     */
634    public boolean isQuoteCharacterSet() {
635        return quoteCharacter != null;
636    }
637
638    /**
639     * Parses the specified content.
640     *
641     * <p>
642     * See also the various static parse methods on {@link CSVParser}.
643     * </p>
644     *
645     * @param in
646     *            the input stream
647     * @return a parser over a stream of {@link CSVRecord}s.
648     * @throws IOException
649     *             If an I/O error occurs
650     */
651    public CSVParser parse(final Reader in) throws IOException {
652        return new CSVParser(in, this);
653    }
654
655    /**
656     * Prints to the specified output.
657     *
658     * <p>
659     * See also {@link CSVPrinter}.
660     * </p>
661     *
662     * @param out
663     *        the output
664     * @return a printer to an output
665     * @throws IOException
666     *         thrown if the optional header cannot be printed.
667     */
668    public CSVPrinter print(final Appendable out) throws IOException {
669        return new CSVPrinter(out, this);
670    }
671
672    @Override
673    public String toString() {
674        final StringBuilder sb = new StringBuilder();
675        sb.append("Delimiter=<").append(delimiter).append('>');
676        if (isEscapeCharacterSet()) {
677            sb.append(' ');
678            sb.append("Escape=<").append(escapeCharacter).append('>');
679        }
680        if (isQuoteCharacterSet()) {
681            sb.append(' ');
682            sb.append("QuoteChar=<").append(quoteCharacter).append('>');
683        }
684        if (isCommentMarkerSet()) {
685            sb.append(' ');
686            sb.append("CommentStart=<").append(commentMarker).append('>');
687        }
688        if (isNullStringSet()) {
689            sb.append(' ');
690            sb.append("NullString=<").append(nullString).append('>');
691        }
692        if(recordSeparator != null) {
693            sb.append(' ');
694            sb.append("RecordSeparator=<").append(recordSeparator).append('>');
695        }
696        if (getIgnoreEmptyLines()) {
697            sb.append(" EmptyLines:ignored");
698        }
699        if (getIgnoreSurroundingSpaces()) {
700            sb.append(" SurroundingSpaces:ignored");
701        }
702        sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
703        if (header != null) {
704            sb.append(' ');
705            sb.append("Header:").append(Arrays.toString(header));
706        }
707        return sb.toString();
708    }
709
710    /**
711     * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
712     *
713     * @throws IllegalArgumentException
714     */
715    private void validate() throws IllegalArgumentException {
716        if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
717            throw new IllegalArgumentException(
718                    "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
719        }
720
721        if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
722            throw new IllegalArgumentException(
723                    "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
724        }
725
726        if (commentMarker != null && delimiter == commentMarker.charValue()) {
727            throw new IllegalArgumentException(
728                    "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
729        }
730
731        if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
732            throw new IllegalArgumentException(
733                    "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')");
734        }
735
736        if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
737            throw new IllegalArgumentException(
738                    "The comment start and the escape character cannot be the same ('" + commentMarker + "')");
739        }
740
741        if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
742            throw new IllegalArgumentException("No quotes mode set but no escape character is set");
743        }
744    }
745
746    /**
747     * Sets the comment start marker of the format to the specified character.
748     *
749     * Note that the comment start character is only recognized at the start of a line.
750     *
751     * @param commentMarker
752     *            the comment start marker
753     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
754     * @throws IllegalArgumentException
755     *             thrown if the specified character is a line break
756     */
757    public CSVFormat withCommentMarker(final char commentMarker) {
758        return withCommentMarker(Character.valueOf(commentMarker));
759    }
760
761    /**
762     * Sets the comment start marker of the format to the specified character.
763     *
764     * Note that the comment start character is only recognized at the start of a line.
765     *
766     * @param commentMarker
767     *            the comment start marker, use {@code null} to disable
768     * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
769     * @throws IllegalArgumentException
770     *             thrown if the specified character is a line break
771     */
772    public CSVFormat withCommentMarker(final Character commentMarker) {
773        if (isLineBreak(commentMarker)) {
774            throw new IllegalArgumentException("The comment start marker character cannot be a line break");
775        }
776        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
777                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
778                allowMissingColumnNames);
779    }
780
781    /**
782     * Sets the delimiter of the format to the specified character.
783     *
784     * @param delimiter
785     *            the delimiter character
786     * @return A new CSVFormat that is equal to this with the specified character as delimiter
787     * @throws IllegalArgumentException
788     *             thrown if the specified character is a line break
789     */
790    public CSVFormat withDelimiter(final char delimiter) {
791        if (isLineBreak(delimiter)) {
792            throw new IllegalArgumentException("The delimiter cannot be a line break");
793        }
794        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
795                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
796                allowMissingColumnNames);
797    }
798
799    /**
800     * Sets the escape character of the format to the specified character.
801     *
802     * @param escape
803     *            the escape character
804     * @return A new CSVFormat that is equal to his but with the specified character as the escape character
805     * @throws IllegalArgumentException
806     *             thrown if the specified character is a line break
807     */
808    public CSVFormat withEscape(final char escape) {
809        return withEscape(Character.valueOf(escape));
810    }
811
812    /**
813     * Sets the escape character of the format to the specified character.
814     *
815     * @param escape
816     *            the escape character, use {@code null} to disable
817     * @return A new CSVFormat that is equal to this but with the specified character as the escape character
818     * @throws IllegalArgumentException
819     *             thrown if the specified character is a line break
820     */
821    public CSVFormat withEscape(final Character escape) {
822        if (isLineBreak(escape)) {
823            throw new IllegalArgumentException("The escape character cannot be a line break");
824        }
825        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape,
826                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
827                allowMissingColumnNames);
828    }
829
830    /**
831     * Sets the header of the format. The header can either be parsed automatically from the input file with:
832     *
833     * <pre>
834     * CSVFormat format = aformat.withHeader();</pre>
835     *
836     * or specified manually with:
837     *
838     * <pre>
839     * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);</pre>
840     *
841     * @param header
842     *            the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
843     *
844     * @return A new CSVFormat that is equal to this but with the specified header
845     * @see #withSkipHeaderRecord(boolean)
846     */
847    public CSVFormat withHeader(final String... header) {
848        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
849                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
850                allowMissingColumnNames);
851    }
852
853    /**
854     * Sets the missing column names behavior of the format.
855     *
856     * @param allowMissingColumnNames
857     *            the missing column names behavior, {@code true} to allow missing column names in the header line,
858     *            {@code false} to cause an {@link IllegalArgumentException} to be thrown.
859     * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
860     */
861    public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
862        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
863                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
864                allowMissingColumnNames);
865    }
866
867    /**
868     * Sets the empty line skipping behavior of the format.
869     *
870     * @param ignoreEmptyLines
871     *            the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
872     *            {@code false} to translate empty lines to empty records.
873     * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
874     */
875    public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
876        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
877                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
878                allowMissingColumnNames);
879    }
880
881    /**
882     * Sets the trimming behavior of the format.
883     *
884     * @param ignoreSurroundingSpaces
885     *            the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the
886     *            spaces as is.
887     * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
888     */
889    public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
890        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
891                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
892                allowMissingColumnNames);
893    }
894
895    /**
896     * Performs conversions to and from null for strings on input and output.
897     * <ul>
898     * <li>
899     * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
900     * records.</li>
901     * <li>
902     * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
903     * </ul>
904     *
905     * @param nullString
906     *            the String to convert to and from {@code null}. No substitution occurs if {@code null}
907     *
908     * @return A new CSVFormat that is equal to this but with the specified null conversion string.
909     */
910    public CSVFormat withNullString(final String nullString) {
911        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
912                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
913                allowMissingColumnNames);
914    }
915
916    /**
917     * Sets the quoteChar of the format to the specified character.
918     *
919     * @param quoteChar
920     *            the quoteChar character
921     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
922     * @throws IllegalArgumentException
923     *             thrown if the specified character is a line break
924     */
925    public CSVFormat withQuote(final char quoteChar) {
926        return withQuote(Character.valueOf(quoteChar));
927    }
928
929    /**
930     * Sets the quoteChar of the format to the specified character.
931     *
932     * @param quoteChar
933     *            the quoteChar character, use {@code null} to disable
934     * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
935     * @throws IllegalArgumentException
936     *             thrown if the specified character is a line break
937     */
938    public CSVFormat withQuote(final Character quoteChar) {
939        if (isLineBreak(quoteChar)) {
940            throw new IllegalArgumentException("The quoteChar cannot be a line break");
941        }
942        return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter,
943                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
944                allowMissingColumnNames);
945    }
946
947    /**
948     * Sets the output quote policy of the format to the specified value.
949     *
950     * @param quoteModePolicy
951     *            the quote policy to use for output.
952     *
953     * @return A new CSVFormat that is equal to this but with the specified quote policy
954     */
955    public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
956        return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
957                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
958                allowMissingColumnNames);
959    }
960
961    /**
962     * Sets the record separator of the format to the specified character.
963     *
964     * <p><strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing
965     * currently only works for inputs with '\n', '\r' and "\r\n"</p>
966     *
967     * @param recordSeparator
968     *            the record separator to use for output.
969     *
970     * @return A new CSVFormat that is equal to this but with the the specified output record separator
971     */
972    public CSVFormat withRecordSeparator(final char recordSeparator) {
973        return withRecordSeparator(String.valueOf(recordSeparator));
974    }
975
976    /**
977     * Sets the record separator of the format to the specified String.
978     *
979     * <p><strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing
980     * currently only works for inputs with '\n', '\r' and "\r\n"</p>
981     *
982     * @param recordSeparator
983     *            the record separator to use for output.
984     *
985     * @return A new CSVFormat that is equal to this but with the the specified output record separator
986     * @throws IllegalArgumentException
987     *              if recordSeparator is none of CR, LF or CRLF
988     */
989    public CSVFormat withRecordSeparator(final String recordSeparator) {
990        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
991                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
992                allowMissingColumnNames);
993    }
994
995    /**
996     * Sets whether to skip the header record.
997     *
998     * @param skipHeaderRecord
999     *            whether to skip the header record.
1000     *
1001     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1002     * @see #withHeader(String...)
1003     */
1004    public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
1005        return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1006                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord,
1007                allowMissingColumnNames);
1008    }
1009}