1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * https://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 package org.apache.commons.csv;
21
22 import static org.apache.commons.io.IOUtils.EOF;
23
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.OutputStream;
28 import java.io.Reader;
29 import java.io.Serializable;
30 import java.io.StringWriter;
31 import java.io.Writer;
32 import java.nio.charset.Charset;
33 import java.nio.file.Files;
34 import java.nio.file.Path;
35 import java.sql.ResultSet;
36 import java.sql.ResultSetMetaData;
37 import java.sql.SQLException;
38 import java.util.Arrays;
39 import java.util.HashSet;
40 import java.util.Objects;
41 import java.util.Set;
42 import java.util.function.Supplier;
43
44 import org.apache.commons.codec.binary.Base64OutputStream;
45 import org.apache.commons.io.IOUtils;
46 import org.apache.commons.io.function.IOStream;
47 import org.apache.commons.io.function.Uncheck;
48 import org.apache.commons.io.output.AppendableOutputStream;
49
50 /**
51 * Specifies the format of a CSV file for parsing and writing.
52 *
53 * <h2>Using predefined formats</h2>
54 *
55 * <p>
56 * You can use one of the predefined formats:
57 * </p>
58 *
59 * <ul>
60 * <li>{@link #DEFAULT}</li>
61 * <li>{@link #EXCEL}</li>
62 * <li>{@link #INFORMIX_UNLOAD}</li>
63 * <li>{@link #INFORMIX_UNLOAD_CSV}</li>
64 * <li>{@link #MONGODB_CSV}</li>
65 * <li>{@link #MONGODB_TSV}</li>
66 * <li>{@link #MYSQL}</li>
67 * <li>{@link #ORACLE}</li>
68 * <li>{@link #POSTGRESQL_CSV}</li>
69 * <li>{@link #POSTGRESQL_TEXT}</li>
70 * <li>{@link #RFC4180}</li>
71 * <li>{@link #TDF}</li>
72 * </ul>
73 *
74 * <p>
75 * For example:
76 * </p>
77 *
78 * <pre>
79 * CSVParser parser = CSVFormat.EXCEL.parse(reader);
80 * </pre>
81 *
82 * <p>
83 * The {@link CSVParser} provides static methods to parse other input types, for example:
84 * </p>
85 *
86 * <pre>
87 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
88 * </pre>
89 *
90 * <h2>Defining formats</h2>
91 *
92 * <p>
93 * You can extend a format by calling the {@code set} methods. For example:
94 * </p>
95 *
96 * <pre>{@code
97 * CSVFormat.EXCEL.builder().setNullString("N/A").setIgnoreSurroundingSpaces(true).get();
98 * }</pre>
99 *
100 * <h2>Defining column names</h2>
101 *
102 * <p>
103 * To define the column names you want to use to access records, write:
104 * </p>
105 *
106 * <pre>{@code
107 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get();
108 * }</pre>
109 *
110 * <p>
111 * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not
112 * contain a first record that also defines column names.
113 *
114 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
115 * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}.
116 * </p>
117 *
118 * <h2>Parsing</h2>
119 *
120 * <p>
121 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
122 * </p>
123 *
124 * <pre>{@code
125 * Reader in = ...;
126 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get().parse(in);
127 * }</pre>
128 *
129 * <p>
130 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
131 * </p>
132 *
133 * <h2>Referencing columns safely</h2>
134 *
135 * <p>
136 * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no
137 * arguments:
138 * </p>
139 *
140 * <pre>
141 * CSVFormat.EXCEL.builder().setHeader().get();
142 * </pre>
143 *
144 * <p>
145 * This causes the parser to read the first record and use its values as column names.
146 *
147 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
148 * </p>
149 *
150 * <pre>{@code
151 * String value = record.get("Col1");
152 * }</pre>
153 *
154 * <p>
155 * This makes your code impervious to changes in column order in the CSV file.
156 * </p>
157 *
158 * <h2>Serialization</h2>
159 * <p>
160 * This class implements the {@link Serializable} interface with the following caveats:
161 * </p>
162 * <ul>
163 * <li>This class will no longer implement Serializable in 2.0.</li>
164 * <li>Serialization is not supported from one version to the next.</li>
165 * </ul>
166 * <p>
167 * The {@code serialVersionUID} values are:
168 * </p>
169 * <ul>
170 * <li>Version 1.10.0: {@code 2L}</li>
171 * <li>Version 1.9.0 through 1.0: {@code 1L}</li>
172 * </ul>
173 *
174 * <h2>Notes</h2>
175 * <p>
176 * This class is immutable.
177 * </p>
178 * <p>
179 * Not all settings are used for both parsing and writing.
180 * </p>
181 */
182 public final class CSVFormat implements Serializable {
183
184 /**
185 * Builds CSVFormat instances.
186 *
187 * @since 1.9.0
188 */
189 public static class Builder implements Supplier<CSVFormat> {
190
191 /**
192 * Creates a new default builder, as for {@link #RFC4180} but allowing empty lines.
193 *
194 * <p>
195 * The {@link Builder} settings are:
196 * </p>
197 * <ul>
198 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
199 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
200 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li>
201 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}</li>
202 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li>
203 * <li>All other values take their Java defaults, {@code false} for booleans, {@code null} for object references.</li>
204 * </ul>
205 *
206 * @see Predefined#Default
207 * @see DuplicateHeaderMode#ALLOW_ALL
208 *
209 * @return a copy of the builder
210 */
211 public static Builder create() {
212 // @formatter:off
213 return new Builder()
214 .setDelimiter(Constants.COMMA)
215 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
216 .setRecordSeparator(Constants.CRLF)
217 .setIgnoreEmptyLines(true)
218 .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL);
219 // @formatter:on
220 }
221
222 /**
223 * Creates a new builder from the given format.
224 *
225 * @param csvFormat the source format.
226 * @return a new builder.
227 */
228 public static Builder create(final CSVFormat csvFormat) {
229 return new Builder(csvFormat);
230 }
231
232 private boolean allowMissingColumnNames;
233
234 private boolean autoFlush;
235
236 private Character commentMarker;
237
238 private String delimiter;
239
240 private DuplicateHeaderMode duplicateHeaderMode;
241
242 private Character escapeCharacter;
243
244 private String[] headerComments;
245
246 private String[] headers;
247
248 private boolean ignoreEmptyLines;
249
250 private boolean ignoreHeaderCase;
251
252 private boolean ignoreSurroundingSpaces;
253
254 private String nullString;
255
256 private Character quoteCharacter;
257
258 private String quotedNullString;
259
260 private QuoteMode quoteMode;
261
262 private String recordSeparator;
263
264 private boolean skipHeaderRecord;
265
266 private boolean lenientEof;
267
268 private boolean trailingData;
269
270 private boolean trailingDelimiter;
271
272 private boolean trim;
273
274 /** The maximum number of rows to process, excluding the header row. */
275 private long maxRows;
276
277 private Builder() {
278 // empty
279 }
280
281 private Builder(final CSVFormat csvFormat) {
282 this.allowMissingColumnNames = csvFormat.allowMissingColumnNames;
283 this.autoFlush = csvFormat.autoFlush;
284 this.commentMarker = csvFormat.commentMarker;
285 this.delimiter = csvFormat.delimiter;
286 this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
287 this.escapeCharacter = csvFormat.escapeCharacter;
288 this.headerComments = csvFormat.headerComments;
289 this.headers = csvFormat.headers;
290 this.ignoreEmptyLines = csvFormat.ignoreEmptyLines;
291 this.ignoreHeaderCase = csvFormat.ignoreHeaderCase;
292 this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces;
293 this.lenientEof = csvFormat.lenientEof;
294 this.maxRows = csvFormat.maxRows;
295 this.nullString = csvFormat.nullString;
296 this.quoteCharacter = csvFormat.quoteCharacter;
297 this.quoteMode = csvFormat.quoteMode;
298 this.quotedNullString = csvFormat.quotedNullString;
299 this.recordSeparator = csvFormat.recordSeparator;
300 this.skipHeaderRecord = csvFormat.skipHeaderRecord;
301 this.trailingData = csvFormat.trailingData;
302 this.trailingDelimiter = csvFormat.trailingDelimiter;
303 this.trim = csvFormat.trim;
304 }
305
306 /**
307 * Builds a new CSVFormat instance.
308 *
309 * @return a new CSVFormat instance.
310 * @deprecated Use {@link #get()}.
311 */
312 @Deprecated
313 public CSVFormat build() {
314 return get();
315 }
316
317 /**
318 * Builds a new CSVFormat instance.
319 *
320 * @return a new CSVFormat instance.
321 * @since 1.13.0
322 */
323 @Override
324 public CSVFormat get() {
325 return new CSVFormat(this);
326 }
327
328 /**
329 * Sets the duplicate header names behavior, true to allow, false to disallow.
330 *
331 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
332 * @return This instance.
333 * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
334 */
335 @Deprecated
336 public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
337 setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY);
338 return this;
339 }
340
341 /**
342 * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
343 * {@link IllegalArgumentException} to be thrown.
344 *
345 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to
346 * cause an {@link IllegalArgumentException} to be thrown.
347 * @return This instance.
348 */
349 public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) {
350 this.allowMissingColumnNames = allowMissingColumnNames;
351 return this;
352 }
353
354 /**
355 * Sets whether to flush on close.
356 *
357 * @param autoFlush whether to flush on close.
358 * @return This instance.
359 */
360 public Builder setAutoFlush(final boolean autoFlush) {
361 this.autoFlush = autoFlush;
362 return this;
363 }
364
365 /**
366 * Sets the comment marker character, use {@code null} to disable comments.
367 * <p>
368 * The comment start character is only recognized at the start of a line.
369 * </p>
370 * <p>
371 * Comments are printed first, before headers.
372 * </p>
373 * <p>
374 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line.
375 * </p>
376 * <p>
377 * If the comment marker is not set, then the header comments are ignored.
378 * </p>
379 * <p>
380 * For example:
381 * </p>
382 *
383 * <pre>
384 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
385 * </pre>
386 * <p>
387 * writes:
388 * </p>
389 *
390 * <pre>
391 * # Generated by Apache Commons CSV.
392 * # 1970-01-01T00:00:00Z
393 * </pre>
394 *
395 * @param commentMarker the comment start marker, use {@code null} to disable.
396 * @return This instance.
397 * @throws IllegalArgumentException thrown if the specified character is a line break
398 */
399 public Builder setCommentMarker(final char commentMarker) {
400 setCommentMarker(Character.valueOf(commentMarker));
401 return this;
402 }
403
404 /**
405 * Sets the comment marker character, use {@code null} to disable comments.
406 * <p>
407 * The comment start character is only recognized at the start of a line.
408 * </p>
409 * <p>
410 * Comments are printed first, before headers.
411 * </p>
412 * <p>
413 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line.
414 * </p>
415 * <p>
416 * If the comment marker is not set, then the header comments are ignored.
417 * </p>
418 * <p>
419 * For example:
420 * </p>
421 *
422 * <pre>
423 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
424 * </pre>
425 * <p>
426 * writes:
427 * </p>
428 *
429 * <pre>
430 * # Generated by Apache Commons CSV.
431 * # 1970-01-01T00:00:00Z
432 * </pre>
433 *
434 * @param commentMarker the comment start marker, use {@code null} to disable.
435 * @return This instance.
436 * @throws IllegalArgumentException thrown if the specified character is a line break
437 */
438 public Builder setCommentMarker(final Character commentMarker) {
439 if (isLineBreak(commentMarker)) {
440 throw new IllegalArgumentException("The comment start marker character cannot be a line break");
441 }
442 this.commentMarker = commentMarker;
443 return this;
444 }
445
446 /**
447 * Sets the delimiter character.
448 *
449 * @param delimiter the delimiter character.
450 * @return This instance.
451 */
452 public Builder setDelimiter(final char delimiter) {
453 return setDelimiter(String.valueOf(delimiter));
454 }
455
456 /**
457 * Sets the delimiter character.
458 *
459 * @param delimiter the delimiter character.
460 * @return This instance.
461 */
462 public Builder setDelimiter(final String delimiter) {
463 if (containsLineBreak(delimiter)) {
464 throw new IllegalArgumentException("The delimiter cannot be a line break");
465 }
466 if (delimiter.isEmpty()) {
467 throw new IllegalArgumentException("The delimiter cannot be empty");
468 }
469 this.delimiter = delimiter;
470 return this;
471 }
472
473 /**
474 * Sets the duplicate header names behavior.
475 *
476 * @param duplicateHeaderMode the duplicate header names behavior
477 * @return This instance.
478 * @since 1.10.0
479 */
480 public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
481 this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode");
482 return this;
483 }
484
485 /**
486 * Sets the escape character.
487 *
488 * @param escapeCharacter the escape character.
489 * @return This instance.
490 * @throws IllegalArgumentException thrown if the specified character is a line break
491 */
492 public Builder setEscape(final char escapeCharacter) {
493 setEscape(Character.valueOf(escapeCharacter));
494 return this;
495 }
496
497 /**
498 * Sets the escape character.
499 *
500 * @param escapeCharacter the escape character.
501 * @return This instance.
502 * @throws IllegalArgumentException thrown if the specified character is a line break
503 */
504 public Builder setEscape(final Character escapeCharacter) {
505 if (isLineBreak(escapeCharacter)) {
506 throw new IllegalArgumentException("The escape character cannot be a line break");
507 }
508 this.escapeCharacter = escapeCharacter;
509 return this;
510 }
511
512 /**
513 * Sets the header defined by the given {@link Enum} class.
514 *
515 * <p>
516 * Example:
517 * </p>
518 *
519 * <pre>
520 * public enum HeaderEnum {
521 * Name, Email, Phone
522 * }
523 *
524 * Builder builder = builder.setHeader(HeaderEnum.class);
525 * </pre>
526 * <p>
527 * The header is also used by the {@link CSVPrinter}.
528 * </p>
529 *
530 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
531 * @return This instance.
532 */
533 public Builder setHeader(final Class<? extends Enum<?>> headerEnum) {
534 String[] header = null;
535 if (headerEnum != null) {
536 final Enum<?>[] enumValues = headerEnum.getEnumConstants();
537 header = new String[enumValues.length];
538 Arrays.setAll(header, i -> enumValues[i].name());
539 }
540 return setHeader(header);
541 }
542
543 /**
544 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
545 *
546 * <pre>
547 * builder.setHeader();
548 * </pre>
549 *
550 * or specified manually with:
551 *
552 * <pre>
553 * builder.setHeader(resultSet);
554 * </pre>
555 * <p>
556 * The header is also used by the {@link CSVPrinter}.
557 * </p>
558 *
559 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
560 * @return This instance.
561 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
562 */
563 public Builder setHeader(final ResultSet resultSet) throws SQLException {
564 return setHeader(resultSet != null ? resultSet.getMetaData() : null);
565 }
566
567 /**
568 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
569 *
570 * <pre>
571 * builder.setHeader();
572 * </pre>
573 *
574 * or specified manually with:
575 *
576 * <pre>
577 * builder.setHeader(resultSetMetaData);
578 * </pre>
579 * <p>
580 * The header is also used by the {@link CSVPrinter}.
581 * </p>
582 *
583 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
584 * @return This instance.
585 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
586 */
587 public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException {
588 String[] labels = null;
589 if (resultSetMetaData != null) {
590 final int columnCount = resultSetMetaData.getColumnCount();
591 labels = new String[columnCount];
592 for (int i = 0; i < columnCount; i++) {
593 labels[i] = resultSetMetaData.getColumnLabel(i + 1);
594 }
595 }
596 return setHeader(labels);
597 }
598
599 /**
600 * Sets the header to the given values. The header can be parsed automatically from the input file with:
601 *
602 * <pre>
603 * builder.setHeader();
604 * </pre>
605 *
606 * or specified manually with:
607 *
608 * <pre>{@code
609 * builder.setHeader("name", "email", "phone");
610 * }</pre>
611 * <p>
612 * The header is also used by the {@link CSVPrinter}.
613 * </p>
614 * <p>
615 * This method keeps a copy of the input array.
616 * </p>
617 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
618 * @return This instance.
619 */
620 public Builder setHeader(final String... header) {
621 this.headers = CSVFormat.clone(header);
622 return this;
623 }
624
625 /**
626 * Sets the header comments to write before the CSV data.
627 * <p>
628 * This setting is ignored by the parser.
629 * </p>
630 * <p>
631 * Comments are printed first, before headers.
632 * </p>
633 * <p>
634 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line.
635 * </p>
636 * <p>
637 * If the comment marker is not set, then the header comments are ignored.
638 * </p>
639 * <p>
640 * For example:
641 * </p>
642 *
643 * <pre>
644 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
645 * </pre>
646 * <p>
647 * writes:
648 * </p>
649 *
650 * <pre>
651 * # Generated by Apache Commons CSV.
652 * # 1970-01-01T00:00:00Z
653 * </pre>
654 * <p>
655 * This method keeps a copy of the input array.
656 * </p>
657 *
658 * @param headerComments the headerComments which will be printed by the Printer before the CSV data.
659 * @return This instance.
660 */
661 public Builder setHeaderComments(final Object... headerComments) {
662 this.headerComments = CSVFormat.clone(toStringArray(headerComments));
663 return this;
664 }
665
666 /**
667 * Sets the header comments to write before the CSV data.
668 * <p>
669 * This setting is ignored by the parser.
670 * </p>
671 * <p>
672 * Comments are printed first, before headers.
673 * </p>
674 * <p>
675 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line.
676 * </p>
677 * <p>
678 * If the comment marker is not set, then the header comments are ignored.
679 * </p>
680 * <p>
681 * For example:
682 * </p>
683 *
684 * <pre>
685 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString());
686 * </pre>
687 * <p>
688 * writes:
689 * </p>
690 *
691 * <pre>
692 * # Generated by Apache Commons CSV.
693 * # 1970-01-01T00:00:00Z
694 * </pre>
695 * <p>
696 * This method keeps a copy of the input array.
697 * </p>
698 * @param headerComments the headerComments which will be printed by the Printer before the CSV data.
699 * @return This instance.
700 */
701 public Builder setHeaderComments(final String... headerComments) {
702 this.headerComments = CSVFormat.clone(headerComments);
703 return this;
704 }
705
706 /**
707 * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty
708 * records.
709 *
710 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate
711 * empty lines to empty records.
712 * @return This instance.
713 */
714 public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) {
715 this.ignoreEmptyLines = ignoreEmptyLines;
716 return this;
717 }
718
719 /**
720 * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is.
721 *
722 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is.
723 * @return This instance.
724 */
725 public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) {
726 this.ignoreHeaderCase = ignoreHeaderCase;
727 return this;
728 }
729
730 /**
731 * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is.
732 *
733 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is.
734 * @return This instance.
735 */
736 public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
737 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
738 return this;
739 }
740
741 /**
742 * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility.
743 *
744 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility.
745 * @return This instance.
746 * @since 1.11.0
747 */
748 public Builder setLenientEof(final boolean lenientEof) {
749 this.lenientEof = lenientEof;
750 return this;
751 }
752
753 /**
754 * Sets the maximum number of rows to process, excluding the header row.
755 * <p>
756 * Values less than or equal to 0 mean no limit.
757 * </p>
758 *
759 * @param maxRows the maximum number of rows to process, excluding the header row.
760 * @return This instance.
761 * @since 1.14.0
762 */
763 public Builder setMaxRows(final long maxRows) {
764 this.maxRows = maxRows;
765 return this;
766 }
767
768 /**
769 * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}.
770 *
771 * <ul>
772 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li>
773 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
774 * </ul>
775 *
776 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}.
777 * @return This instance.
778 */
779 public Builder setNullString(final String nullString) {
780 this.nullString = nullString;
781 this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
782 return this;
783 }
784
785 /**
786 * Sets the quote character.
787 *
788 * @param quoteCharacter the quote character.
789 * @return This instance.
790 */
791 public Builder setQuote(final char quoteCharacter) {
792 setQuote(Character.valueOf(quoteCharacter));
793 return this;
794 }
795
796 /**
797 * Sets the quote character, use {@code null} to disable.
798 *
799 * @param quoteCharacter the quote character, use {@code null} to disable.
800 * @return This instance.
801 */
802 public Builder setQuote(final Character quoteCharacter) {
803 if (isLineBreak(quoteCharacter)) {
804 throw new IllegalArgumentException("The quoteCharacter cannot be a line break");
805 }
806 this.quoteCharacter = quoteCharacter;
807 return this;
808 }
809
810 /**
811 * Sets the quote policy to use for output.
812 *
813 * @param quoteMode the quote policy to use for output.
814 * @return This instance.
815 */
816 public Builder setQuoteMode(final QuoteMode quoteMode) {
817 this.quoteMode = quoteMode;
818 return this;
819 }
820
821 /**
822 * Sets the record separator to use for output.
823 *
824 * <p>
825 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r'
826 * and "\r\n"
827 * </p>
828 *
829 * @param recordSeparator the record separator to use for output.
830 * @return This instance.
831 */
832 public Builder setRecordSeparator(final char recordSeparator) {
833 this.recordSeparator = String.valueOf(recordSeparator);
834 return this;
835 }
836
837 /**
838 * Sets the record separator to use for output.
839 *
840 * <p>
841 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r'
842 * and "\r\n"
843 * </p>
844 *
845 * @param recordSeparator the record separator to use for output.
846 * @return This instance.
847 */
848 public Builder setRecordSeparator(final String recordSeparator) {
849 this.recordSeparator = recordSeparator;
850 return this;
851 }
852
853 /**
854 * Sets whether to skip the header record.
855 *
856 * @param skipHeaderRecord whether to skip the header record.
857 * @return This instance.
858 */
859 public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) {
860 this.skipHeaderRecord = skipHeaderRecord;
861 return this;
862 }
863
864 /**
865 * Sets whether reading trailing data is allowed in records, helps Excel compatibility.
866 *
867 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility.
868 * @return This instance.
869 * @since 1.11.0
870 */
871 public Builder setTrailingData(final boolean trailingData) {
872 this.trailingData = trailingData;
873 return this;
874 }
875
876 /**
877 * Sets whether to add a trailing delimiter.
878 *
879 * @param trailingDelimiter whether to add a trailing delimiter.
880 * @return This instance.
881 */
882 public Builder setTrailingDelimiter(final boolean trailingDelimiter) {
883 this.trailingDelimiter = trailingDelimiter;
884 return this;
885 }
886
887
888 /**
889 * Sets whether to trim leading and trailing blanks.
890 *
891 * @param trim whether to trim leading and trailing blanks.
892 * @return This instance.
893 */
894 public Builder setTrim(final boolean trim) {
895 this.trim = trim;
896 return this;
897 }
898 }
899
900 /**
901 * Predefines formats.
902 *
903 * @since 1.2
904 */
905 public enum Predefined {
906
907 /**
908 * The DEFAULT predefined format.
909 *
910 * @see CSVFormat#DEFAULT
911 */
912 Default(DEFAULT),
913
914 /**
915 * The EXCEL predefined format.
916 *
917 * @see CSVFormat#EXCEL
918 */
919 Excel(EXCEL),
920
921 /**
922 * The INFORMIX_UNLOAD predefined format.
923 *
924 * @see CSVFormat#INFORMIX_UNLOAD
925 * @since 1.3
926 */
927 InformixUnload(INFORMIX_UNLOAD),
928
929 /**
930 * The INFORMIX_UNLOAD_CSV predefined format.
931 *
932 * @see CSVFormat#INFORMIX_UNLOAD_CSV
933 * @since 1.3
934 */
935 InformixUnloadCsv(INFORMIX_UNLOAD_CSV),
936
937 /**
938 * The MONGODB_CSV predefined format.
939 *
940 * @see CSVFormat#MONGODB_CSV
941 * @since 1.7
942 */
943 MongoDBCsv(MONGODB_CSV),
944
945 /**
946 * The MONGODB_TSV predefined format.
947 *
948 * @see CSVFormat#MONGODB_TSV
949 * @since 1.7
950 */
951 MongoDBTsv(MONGODB_TSV),
952
953 /**
954 * The MYSQL predefined format.
955 *
956 * @see CSVFormat#MYSQL
957 */
958 MySQL(MYSQL),
959
960 /**
961 * The ORACLE predefined format.
962 *
963 * @see CSVFormat#ORACLE
964 */
965 Oracle(ORACLE),
966
967 /**
968 * The POSTGRESQL_CSV predefined format.
969 *
970 * @see CSVFormat#POSTGRESQL_CSV
971 * @since 1.5
972 */
973 PostgreSQLCsv(POSTGRESQL_CSV),
974
975 /**
976 * The POSTGRESQL_TEXT predefined format.
977 *
978 * @see CSVFormat#POSTGRESQL_TEXT
979 */
980 PostgreSQLText(POSTGRESQL_TEXT),
981
982 /**
983 * The RFC4180 predefined format.
984 *
985 * @see CSVFormat#RFC4180
986 */
987 RFC4180(CSVFormat.RFC4180),
988
989 /**
990 * The TDF predefined format.
991 *
992 * @see CSVFormat#TDF
993 */
994 TDF(CSVFormat.TDF);
995
996 private final CSVFormat format;
997
998 Predefined(final CSVFormat format) {
999 this.format = format;
1000 }
1001
1002 /**
1003 * Gets the format.
1004 *
1005 * @return the format.
1006 */
1007 public CSVFormat getFormat() {
1008 return format;
1009 }
1010 }
1011
1012 /**
1013 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
1014 *
1015 * <p>
1016 * The {@link Builder} settings are:
1017 * </p>
1018 * <ul>
1019 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1020 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1021 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li>
1022 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (true)}</li>
1023 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li>
1024 * </ul>
1025 *
1026 * @see Predefined#Default
1027 * @see DuplicateHeaderMode#ALLOW_ALL
1028 */
1029 public static final CSVFormat DEFAULT = new CSVFormat(Builder.create());
1030
1031 /**
1032 * <a href="https://support.microsoft.com/en-us/office/import-or-export-text-txt-or-csv-files-5250ac4c-663c-47ce-937b-339e391393ba">Microsoft Excel</a> file
1033 * format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary to customize
1034 * this format to accommodate your regional settings.
1035 *
1036 * <p>
1037 * For example for parsing or generating a CSV file on a French system the following format will be used:
1038 * </p>
1039 *
1040 * <pre>
1041 * CSVFormat format = CSVFormat.EXCEL.builder().setDelimiter(';').get();
1042 * </pre>
1043 *
1044 * <p>
1045 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1046 * </p>
1047 * <ul>
1048 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1049 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1050 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li>
1051 * <li>{@link Builder#setDuplicateHeaderMode(DuplicateHeaderMode) setDuplicateHeaderMode}{@code (DuplicateHeaderMode.ALLOW_ALL)}</li>
1052 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1053 * <li>{@link Builder#setAllowMissingColumnNames(boolean) setAllowMissingColumnNames}{@code (true)}</li>
1054 * <li>{@link Builder#setTrailingData(boolean) setTrailingData}{@code (true)}</li>
1055 * <li>{@link Builder#setLenientEof(boolean) setLenientEof}{@code (true)}</li>
1056 * </ul>
1057 * <p>
1058 * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
1059 * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}.
1060 * </p>
1061 *
1062 * @see Predefined#Excel
1063 * @see DuplicateHeaderMode#ALLOW_ALL
1064 * @see <a href="https://support.microsoft.com/en-us/office/import-or-export-text-txt-or-csv-files-5250ac4c-663c-47ce-937b-339e391393ba">Microsoft Excel
1065 * </a>
1066 */
1067 // @formatter:off
1068 public static final CSVFormat EXCEL = DEFAULT.builder()
1069 .setIgnoreEmptyLines(false)
1070 .setAllowMissingColumnNames(true)
1071 .setTrailingData(true)
1072 .setLenientEof(true)
1073 .get();
1074 // @formatter:on
1075
1076 /**
1077 * Default <a href="https://www.ibm.com/docs/en/informix-servers/14.10?topic=statements-unload-statement">Informix CSV UNLOAD</a>
1078 * format used by the {@code UNLOAD TO file_name} operation.
1079 *
1080 * <p>
1081 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
1082 * The default NULL string is {@code "\\N"}.
1083 * </p>
1084 *
1085 * <p>
1086 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1087 * </p>
1088 * <ul>
1089 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1090 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li>
1091 * <li>{@link Builder#setQuote(char) setQuote}{@code ('\"')}</li>
1092 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li>
1093 * </ul>
1094 *
1095 * @see Predefined#MySQL
1096 * @see <a href="https://www.ibm.com/docs/en/informix-servers/14.10?topic=statements-unload-statement">Informix CSV UNLOAD</a>
1097 * @since 1.3
1098 */
1099 // @formatter:off
1100 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder()
1101 .setDelimiter(Constants.PIPE)
1102 .setEscape(Constants.BACKSLASH)
1103 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1104 .setRecordSeparator(Constants.LF)
1105 .get();
1106 // @formatter:on
1107
1108 /**
1109 * Default <a href="https://www.ibm.com/docs/en/informix-servers/14.10?topic=statements-unload-statement">Informix CSV UNLOAD</a>
1110 * format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
1111 *
1112 * <p>
1113 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
1114 * The default NULL string is {@code "\\N"}.
1115 * </p>
1116 *
1117 * <p>
1118 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1119 * </p>
1120 * <ul>
1121 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1122 * <li>{@link Builder#setQuote(char) setQuote}{@code ('\"')}</li>
1123 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li>
1124 * </ul>
1125 *
1126 * @see Predefined#MySQL
1127 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
1128 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>
1129 * @since 1.3
1130 */
1131 // @formatter:off
1132 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder()
1133 .setDelimiter(Constants.COMMA)
1134 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1135 .setRecordSeparator(Constants.LF)
1136 .get();
1137 // @formatter:on
1138
1139 /**
1140 * Default MongoDB CSV format used by the {@code mongoexport} operation.
1141 * <p>
1142 * <strong>Parsing is not supported yet.</strong>
1143 * </p>
1144 *
1145 * <p>
1146 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field
1147 * names is expected.
1148 * </p>
1149 * <p>
1150 * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states:
1151 * </p>
1152 * <blockquote>The csv parser accepts that data that complies with RFC <a href="https://tools.ietf.org/html/4180">RFC-4180</a>. As a result, backslashes are
1153 * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending
1154 * another double-quote. </blockquote>
1155 * <p>
1156 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1157 * </p>
1158 * <ul>
1159 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1160 * <li>{@link Builder#setEscape(char) setEscape}{@code ('"')}</li>
1161 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1162 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}</li>
1163 * </ul>
1164 *
1165 * @see Predefined#MongoDBCsv
1166 * @see QuoteMode#ALL_NON_NULL
1167 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command documentation</a>
1168 * @since 1.7
1169 */
1170 // @formatter:off
1171 public static final CSVFormat MONGODB_CSV = DEFAULT.builder()
1172 .setDelimiter(Constants.COMMA)
1173 .setEscape(Constants.DOUBLE_QUOTE_CHAR)
1174 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1175 .setQuoteMode(QuoteMode.MINIMAL)
1176 .get();
1177 // @formatter:off
1178
1179 /**
1180 * Default MongoDB TSV format used by the {@code mongoexport} operation.
1181 * <p>
1182 * <strong>Parsing is not supported yet.</strong>
1183 * </p>
1184 *
1185 * <p>
1186 * This is a tab-delimited format. Values are double quoted only if needed and special
1187 * characters are escaped with {@code '"'}. A header line with field names is expected.
1188 * </p>
1189 *
1190 * <p>
1191 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1192 * </p>
1193 * <ul>
1194 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li>
1195 * <li>{@link Builder#setEscape(char) setEscape}{@code ('"')}</li>
1196 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1197 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}</li>
1198 * <li>{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord}{@code (false)}</li>
1199 * </ul>
1200 *
1201 * @see Predefined#MongoDBCsv
1202 * @see QuoteMode#ALL_NON_NULL
1203 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command
1204 * documentation</a>
1205 * @since 1.7
1206 */
1207 // @formatter:off
1208 public static final CSVFormat MONGODB_TSV = DEFAULT.builder()
1209 .setDelimiter(Constants.TAB)
1210 .setEscape(Constants.DOUBLE_QUOTE_CHAR)
1211 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1212 .setQuoteMode(QuoteMode.MINIMAL)
1213 .setSkipHeaderRecord(false)
1214 .get();
1215 // @formatter:off
1216
1217 /**
1218 * Default <a href="https://dev.mysql.com/doc/refman/8.0/en/mysqldump-delimited-text.html">MySQL</a>
1219 * format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
1220 *
1221 * <p>
1222 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
1223 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
1224 * </p>
1225 *
1226 * <p>
1227 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1228 * </p>
1229 * <ul>
1230 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li>
1231 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li>
1232 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1233 * <li>{@link Builder#setQuote(Character) setQuote}{@code (null)}</li>
1234 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li>
1235 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li>
1236 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li>
1237 * </ul>
1238 *
1239 * @see Predefined#MySQL
1240 * @see QuoteMode#ALL_NON_NULL
1241 * @see <a href="https://dev.mysql.com/doc/refman/8.0/en/mysqldump-delimited-text.html">MySQL</a>
1242 */
1243 // @formatter:off
1244 public static final CSVFormat MYSQL = DEFAULT.builder()
1245 .setDelimiter(Constants.TAB)
1246 .setEscape(Constants.BACKSLASH)
1247 .setIgnoreEmptyLines(false)
1248 .setQuote(null)
1249 .setRecordSeparator(Constants.LF)
1250 .setNullString(Constants.SQL_NULL_STRING)
1251 .setQuoteMode(QuoteMode.ALL_NON_NULL)
1252 .get();
1253 // @formatter:off
1254
1255 /**
1256 * Default
1257 * <a href="https://docs.oracle.com/en/database/oracle/oracle-database/23/sutil/oracle-sql-loader-control-file-contents.html#GUID-D1762699-8154-40F6-90DE-EFB8EB6A9AB0">Oracle</a>
1258 * format used by the SQL*Loader utility.
1259 *
1260 * <p>
1261 * This is a comma-delimited format with the system line separator character as the record separator. Values are
1262 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
1263 * {@code ""}. Values are trimmed.
1264 * </p>
1265 *
1266 * <p>
1267 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1268 * </p>
1269 * <ul>
1270 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')} // default is {@code FIELDS TERMINATED BY ','}}</li>
1271 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li>
1272 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1273 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')} // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li>
1274 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li>
1275 * <li>{@link Builder#setTrim(boolean) setTrim}{@code (true)}</li>
1276 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code (System.lineSeparator())}</li>
1277 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.MINIMAL)}</li>
1278 * </ul>
1279 *
1280 * @see Predefined#Oracle
1281 * @see QuoteMode#MINIMAL
1282 * @see <a href="https://docs.oracle.com/en/database/oracle/oracle-database/23/sutil/oracle-sql-loader-control-file-contents.html#GUID-D1762699-8154-40F6-90DE-EFB8EB6A9AB0">Oracle CSV Format Specification</a>
1283 * @since 1.6
1284 */
1285 // @formatter:off
1286 public static final CSVFormat ORACLE = DEFAULT.builder()
1287 .setDelimiter(Constants.COMMA)
1288 .setEscape(Constants.BACKSLASH)
1289 .setIgnoreEmptyLines(false)
1290 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1291 .setNullString(Constants.SQL_NULL_STRING)
1292 .setTrim(true)
1293 .setRecordSeparator(System.lineSeparator())
1294 .setQuoteMode(QuoteMode.MINIMAL)
1295 .get();
1296 // @formatter:off
1297
1298 /**
1299 * Default <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL CSV</a> format used by the {@code COPY} operation.
1300 *
1301 * <p>
1302 * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special
1303 * characters are not escaped. The default NULL string is {@code ""}.
1304 * </p>
1305 *
1306 * <p>
1307 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1308 * </p>
1309 * <ul>
1310 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1311 * <li>{@link Builder#setEscape(Character) setEscape}{@code (null)}</li>
1312 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1313 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1314 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li>
1315 * <li>{@link Builder#setNullString(String) setNullString}{@code ("")}</li>
1316 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li>
1317 * </ul>
1318 *
1319 * @see Predefined#MySQL
1320 * @see QuoteMode#ALL_NON_NULL
1321 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL CSV</a>
1322 * @since 1.5
1323 */
1324 // @formatter:off
1325 public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder()
1326 .setDelimiter(Constants.COMMA)
1327 .setEscape(null)
1328 .setIgnoreEmptyLines(false)
1329 .setQuote(Constants.DOUBLE_QUOTE_CHAR)
1330 .setRecordSeparator(Constants.LF)
1331 .setNullString(Constants.EMPTY)
1332 .setQuoteMode(QuoteMode.ALL_NON_NULL)
1333 .get();
1334 // @formatter:off
1335
1336 /**
1337 * Default <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL Text</a> format used by the {@code COPY} operation.
1338 *
1339 * <p>
1340 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
1341 * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
1342 * </p>
1343 *
1344 * <p>
1345 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1346 * </p>
1347 * <ul>
1348 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li>
1349 * <li>{@link Builder#setEscape(char) setEscape}{@code ('\\')}</li>
1350 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1351 * <li>{@link Builder#setQuote(Character) setQuote}{@code (null)}</li>
1352 * <li>{@link Builder#setRecordSeparator(char) setRecordSeparator}{@code ('\n')}</li>
1353 * <li>{@link Builder#setNullString(String) setNullString}{@code ("\\N")}</li>
1354 * <li>{@link Builder#setQuoteMode(QuoteMode) setQuoteMode}{@code (QuoteMode.ALL_NON_NULL)}</li>
1355 * </ul>
1356 *
1357 * @see Predefined#MySQL
1358 * @see QuoteMode#ALL_NON_NULL
1359 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL Text</a>
1360 * @since 1.5
1361 */
1362 // @formatter:off
1363 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder()
1364 .setDelimiter(Constants.TAB)
1365 .setEscape(Constants.BACKSLASH)
1366 .setIgnoreEmptyLines(false)
1367 .setQuote(null)
1368 .setRecordSeparator(Constants.LF)
1369 .setNullString(Constants.SQL_NULL_STRING)
1370 .setQuoteMode(QuoteMode.ALL_NON_NULL)
1371 .get();
1372 // @formatter:off
1373
1374 /**
1375 * Comma separated format as defined by <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>.
1376 *
1377 * <p>
1378 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1379 * </p>
1380 * <ul>
1381 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code (',')}</li>
1382 * <li>{@link Builder#setQuote(char) setQuote}{@code ('"')}</li>
1383 * <li>{@link Builder#setRecordSeparator(String) setRecordSeparator}{@code ("\r\n")}</li>
1384 * <li>{@link Builder#setIgnoreEmptyLines(boolean) setIgnoreEmptyLines}{@code (false)}</li>
1385 * </ul>
1386 *
1387 * @see Predefined#RFC4180
1388 * @see <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>
1389 */
1390 public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).get();
1391
1392 private static final long serialVersionUID = 2L;
1393
1394 /**
1395 * Tab-delimited format (<a href="https://en.wikipedia.org/wiki/Tab-separated_values">TDF</a>).
1396 *
1397 * <p>
1398 * The {@link Builder} settings are the {@link #DEFAULT} <em>with</em>:
1399 * </p>
1400 * <ul>
1401 * <li>{@link Builder#setDelimiter(char) setDelimiter}{@code ('\t')}</li>
1402 * <li>{@link Builder#setIgnoreSurroundingSpaces(boolean) setIgnoreSurroundingSpaces}{@code (true)}</li>
1403 * </ul>
1404 *
1405 * @see Predefined#TDF
1406 * @see <a href="https://en.wikipedia.org/wiki/Tab-separated_values">TDF</a>
1407 */
1408 // @formatter:off
1409 public static final CSVFormat TDF = DEFAULT.builder()
1410 .setDelimiter(Constants.TAB)
1411 .setIgnoreSurroundingSpaces(true)
1412 .get();
1413 // @formatter:on
1414
1415 /**
1416 * Null-safe clone of an array.
1417 *
1418 * @param <T> The array element type.
1419 * @param values the source array
1420 * @return the cloned array.
1421 */
1422 @SafeVarargs
1423 static <T> T[] clone(final T... values) {
1424 return values == null ? null : values.clone();
1425 }
1426
1427 /**
1428 * Returns true if the given string contains the search char.
1429 *
1430 * @param source the string to check.
1431 * @param searchCh the character to search.
1432 * @return true if {@code c} contains a line break character
1433 */
1434 private static boolean contains(final String source, final char searchCh) {
1435 return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0;
1436 }
1437
1438 /**
1439 * Returns true if the given string contains a line break character.
1440 *
1441 * @param source the string to check.
1442 * @return true if {@code c} contains a line break character.
1443 */
1444 private static boolean containsLineBreak(final String source) {
1445 return contains(source, Constants.CR) || contains(source, Constants.LF);
1446 }
1447
1448 /**
1449 * Creates a null-safe copy of the given instance.
1450 *
1451 * @return a copy of the given instance or null if the input is null.
1452 */
1453 static CSVFormat copy(final CSVFormat format) {
1454 return format != null ? format.copy() : null;
1455 }
1456
1457 static boolean isBlank(final String value) {
1458 return value == null || value.trim().isEmpty();
1459 }
1460
1461 /**
1462 * Returns true if the given character is a line break character.
1463 *
1464 * @param c the character to check.
1465 * @return true if {@code c} is a line break character.
1466 */
1467 private static boolean isLineBreak(final char c) {
1468 return c == Constants.LF || c == Constants.CR;
1469 }
1470
1471 /**
1472 * Returns true if the given character is a line break character.
1473 *
1474 * @param c the character to check, may be null.
1475 * @return true if {@code c} is a line break character (and not null).
1476 */
1477 private static boolean isLineBreak(final Character c) {
1478 return c != null && isLineBreak(c.charValue()); // Explicit (un)boxing is intentional
1479 }
1480
1481 /** Same test as in as {@link String#trim()}. */
1482 private static boolean isTrimChar(final char ch) {
1483 return ch <= Constants.SP;
1484 }
1485
1486 /** Same test as in as {@link String#trim()}. */
1487 private static boolean isTrimChar(final CharSequence charSequence, final int pos) {
1488 return isTrimChar(charSequence.charAt(pos));
1489 }
1490
1491 /**
1492 * Creates a new CSV format with the specified delimiter.
1493 *
1494 * <p>
1495 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false.
1496 * </p>
1497 *
1498 * @param delimiter the char used for value separation, must not be a line break character
1499 * @return a new CSV format.
1500 * @throws IllegalArgumentException if the delimiter is a line break character
1501 * @see #DEFAULT
1502 * @see #RFC4180
1503 * @see #MYSQL
1504 * @see #EXCEL
1505 * @see #TDF
1506 */
1507 public static CSVFormat newFormat(final char delimiter) {
1508 return new CSVFormat(new Builder().setDelimiter(delimiter));
1509 }
1510
1511 static String[] toStringArray(final Object[] values) {
1512 if (values == null) {
1513 return null;
1514 }
1515 final String[] strings = new String[values.length];
1516 Arrays.setAll(strings, i -> Objects.toString(values[i], null));
1517 return strings;
1518 }
1519
1520 static CharSequence trim(final CharSequence charSequence) {
1521 if (charSequence instanceof String) {
1522 return ((String) charSequence).trim();
1523 }
1524 final int count = charSequence.length();
1525 int len = count;
1526 int pos = 0;
1527
1528 while (pos < len && isTrimChar(charSequence, pos)) {
1529 pos++;
1530 }
1531 while (pos < len && isTrimChar(charSequence, len - 1)) {
1532 len--;
1533 }
1534 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence;
1535 }
1536
1537 /**
1538 * Gets one of the predefined formats from {@link CSVFormat.Predefined}.
1539 *
1540 * @param format name
1541 * @return one of the predefined formats
1542 * @since 1.2
1543 */
1544 public static CSVFormat valueOf(final String format) {
1545 return CSVFormat.Predefined.valueOf(format).getFormat();
1546 }
1547
1548 /** How duplicate headers are handled. */
1549 private final DuplicateHeaderMode duplicateHeaderMode;
1550
1551 /** Whether missing column names are allowed when parsing the header line. */
1552 private final boolean allowMissingColumnNames;
1553
1554 /** Whether to flush on close. */
1555 private final boolean autoFlush;
1556
1557 /** Set to null if commenting is disabled. */
1558 private final Character commentMarker;
1559
1560 /** The character delimiting the values (typically ";", "," or "\t"). */
1561 private final String delimiter;
1562
1563 /** Set to null if escaping is disabled. */
1564 private final Character escapeCharacter;
1565
1566 /** Array of header column names. */
1567 private final String[] headers;
1568
1569 /** Array of header comment lines. */
1570 private final String[] headerComments;
1571
1572 /** Whether empty lines between records are ignored when parsing input. */
1573 private final boolean ignoreEmptyLines;
1574
1575 /** Should ignore header names case. */
1576 private final boolean ignoreHeaderCase;
1577
1578 /** Should leading/trailing spaces be ignored around values?. */
1579 private final boolean ignoreSurroundingSpaces;
1580
1581 /** The string to be used for null values. */
1582 private final String nullString;
1583
1584 /** Set to null if quoting is disabled. */
1585 private final Character quoteCharacter;
1586
1587 /** Set to {@code quoteCharacter + nullString + quoteCharacter} */
1588 private final String quotedNullString;
1589
1590 /** The quote policy output fields. */
1591 private final QuoteMode quoteMode;
1592
1593 /** For output. */
1594 private final String recordSeparator;
1595
1596 /** Whether to skip the header record. */
1597 private final boolean skipHeaderRecord;
1598
1599 /** Whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. */
1600 private final boolean lenientEof;
1601
1602 /** Whether reading trailing data is allowed in records, helps Excel compatibility. */
1603 private final boolean trailingData;
1604
1605 /** Whether to add a trailing delimiter. */
1606 private final boolean trailingDelimiter;
1607
1608 /** Whether to trim leading and trailing blanks. */
1609 private final boolean trim;
1610
1611 /** The maximum number of rows to process, excluding the header row. */
1612 private final long maxRows;
1613
1614 private CSVFormat(final Builder builder) {
1615 this.allowMissingColumnNames = builder.allowMissingColumnNames;
1616 this.autoFlush = builder.autoFlush;
1617 this.commentMarker = builder.commentMarker;
1618 this.delimiter = builder.delimiter;
1619 this.duplicateHeaderMode = builder.duplicateHeaderMode;
1620 this.escapeCharacter = builder.escapeCharacter;
1621 this.headerComments = builder.headerComments;
1622 this.headers = builder.headers;
1623 this.ignoreEmptyLines = builder.ignoreEmptyLines;
1624 this.ignoreHeaderCase = builder.ignoreHeaderCase;
1625 this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces;
1626 this.lenientEof = builder.lenientEof;
1627 this.maxRows = builder.maxRows;
1628 this.nullString = builder.nullString;
1629 this.quoteCharacter = builder.quoteCharacter;
1630 this.quoteMode = builder.quoteMode;
1631 this.quotedNullString = builder.quotedNullString;
1632 this.recordSeparator = builder.recordSeparator;
1633 this.skipHeaderRecord = builder.skipHeaderRecord;
1634 this.trailingData = builder.trailingData;
1635 this.trailingDelimiter = builder.trailingDelimiter;
1636 this.trim = builder.trim;
1637 validate();
1638 }
1639
1640 private void append(final char c, final Appendable appendable) throws IOException {
1641 // try {
1642 appendable.append(c);
1643 // } catch (final IOException e) {
1644 // throw new UncheckedIOException(e);
1645 // }
1646 }
1647
1648 private void append(final CharSequence csq, final Appendable appendable) throws IOException {
1649 // try {
1650 appendable.append(csq);
1651 // } catch (final IOException e) {
1652 // throw new UncheckedIOException(e);
1653 // }
1654 }
1655
1656 /**
1657 * Creates a new Builder for this instance.
1658 *
1659 * @return a new Builder.
1660 */
1661 public Builder builder() {
1662 return Builder.create(this);
1663 }
1664
1665 /**
1666 * Creates a copy of this instance.
1667 *
1668 * @return a copy of this instance.
1669 */
1670 CSVFormat copy() {
1671 return builder().get();
1672 }
1673
1674 @Override
1675 public boolean equals(final Object obj) {
1676 if (this == obj) {
1677 return true;
1678 }
1679 if (obj == null) {
1680 return false;
1681 }
1682 if (getClass() != obj.getClass()) {
1683 return false;
1684 }
1685 final CSVFormat other = (CSVFormat) obj;
1686 return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush &&
1687 Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
1688 duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) &&
1689 Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) &&
1690 ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase &&
1691 ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof &&
1692 Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) &&
1693 quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) &&
1694 Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord &&
1695 trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
1696 }
1697
1698 private void escape(final char c, final Appendable appendable) throws IOException {
1699 append(escapeCharacter.charValue(), appendable); // Explicit (un)boxing is intentional
1700 append(c, appendable);
1701 }
1702
1703 /**
1704 * Formats the specified values as a CSV record string.
1705 *
1706 * @param values the values to format.
1707 * @return the formatted values.
1708 */
1709 public String format(final Object... values) {
1710 return Uncheck.get(() -> format_(values));
1711 }
1712
1713 private String format_(final Object... values) throws IOException {
1714 final StringWriter out = new StringWriter();
1715 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) {
1716 csvPrinter.printRecord(values);
1717 final String res = out.toString();
1718 final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length();
1719 return res.substring(0, len);
1720 }
1721 }
1722
1723 /**
1724 * Gets whether duplicate names are allowed in the headers.
1725 *
1726 * @return whether duplicate header names are allowed
1727 * @since 1.7
1728 * @deprecated Use {@link #getDuplicateHeaderMode()}.
1729 */
1730 @Deprecated
1731 public boolean getAllowDuplicateHeaderNames() {
1732 return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
1733 }
1734
1735 /**
1736 * Gets whether missing column names are allowed when parsing the header line.
1737 *
1738 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}.
1739 */
1740 public boolean getAllowMissingColumnNames() {
1741 return allowMissingColumnNames;
1742 }
1743
1744 /**
1745 * Gets whether to flush on close.
1746 *
1747 * @return whether to flush on close.
1748 * @since 1.6
1749 */
1750 public boolean getAutoFlush() {
1751 return autoFlush;
1752 }
1753
1754 /**
1755 * Gets the comment marker character, {@code null} disables comments.
1756 * <p>
1757 * The comment start character is only recognized at the start of a line.
1758 * </p>
1759 * <p>
1760 * Comments are printed first, before headers.
1761 * </p>
1762 * <p>
1763 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment
1764 * line.
1765 * </p>
1766 * <p>
1767 * If the comment marker is not set, then the header comments are ignored.
1768 * </p>
1769 * <p>
1770 * For example:
1771 * </p>
1772 *
1773 * <pre>
1774 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
1775 * </pre>
1776 * <p>
1777 * writes:
1778 * </p>
1779 *
1780 * <pre>
1781 * # Generated by Apache Commons CSV.
1782 * # 1970-01-01T00:00:00Z
1783 * </pre>
1784 *
1785 * @return the comment start marker, may be {@code null}
1786 */
1787 public Character getCommentMarker() {
1788 return commentMarker;
1789 }
1790
1791 /**
1792 * Gets the first character delimiting the values (typically ';', ',' or '\t').
1793 *
1794 * @return the first delimiter character.
1795 * @deprecated Use {@link #getDelimiterString()}.
1796 */
1797 @Deprecated
1798 public char getDelimiter() {
1799 return delimiter.charAt(0);
1800 }
1801
1802 /**
1803 * Gets the character delimiting the values (typically ";", "," or "\t").
1804 *
1805 * @return the delimiter.
1806 */
1807 char[] getDelimiterCharArray() {
1808 return delimiter.toCharArray();
1809 }
1810
1811 /**
1812 * Gets the character delimiting the values (typically ";", "," or "\t").
1813 *
1814 * @return the delimiter.
1815 * @since 1.9.0
1816 */
1817 public String getDelimiterString() {
1818 return delimiter;
1819 }
1820
1821 /**
1822 * Gets how duplicate headers are handled.
1823 *
1824 * @return if duplicate header values are allowed, allowed conditionally, or disallowed.
1825 * @since 1.10.0
1826 */
1827 public DuplicateHeaderMode getDuplicateHeaderMode() {
1828 return duplicateHeaderMode;
1829 }
1830
1831 /**
1832 * Gets the escape character.
1833 *
1834 * @return the escape character, may be {@code 0}
1835 */
1836 char getEscapeChar() {
1837 return escapeCharacter != null ? escapeCharacter.charValue() : 0; // Explicit (un)boxing is intentional
1838 }
1839
1840 /**
1841 * Gets the escape character.
1842 *
1843 * @return the escape character, may be {@code null}
1844 */
1845 public Character getEscapeCharacter() {
1846 return escapeCharacter;
1847 }
1848
1849 /**
1850 * Gets a copy of the header array.
1851 *
1852 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
1853 */
1854 public String[] getHeader() {
1855 return headers != null ? headers.clone() : null;
1856 }
1857
1858 /**
1859 * Gets a copy of the header comment array to write before the CSV data.
1860 * <p>
1861 * This setting is ignored by the parser.
1862 * </p>
1863 * <p>
1864 * Comments are printed first, before headers.
1865 * </p>
1866 * <p>
1867 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment
1868 * line.
1869 * </p>
1870 * <p>
1871 * If the comment marker is not set, then the header comments are ignored.
1872 * </p>
1873 * <p>
1874 * For example:
1875 * </p>
1876 *
1877 * <pre>
1878 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
1879 * </pre>
1880 * <p>
1881 * writes:
1882 * </p>
1883 *
1884 * <pre>
1885 * # Generated by Apache Commons CSV.
1886 * # 1970-01-01T00:00:00Z
1887 * </pre>
1888 *
1889 * @return a copy of the header comment array; {@code null} if disabled.
1890 */
1891 public String[] getHeaderComments() {
1892 return headerComments != null ? headerComments.clone() : null;
1893 }
1894
1895 /**
1896 * Gets whether empty lines between records are ignored when parsing input.
1897 *
1898 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records.
1899 */
1900 public boolean getIgnoreEmptyLines() {
1901 return ignoreEmptyLines;
1902 }
1903
1904 /**
1905 * Gets whether header names will be accessed ignoring case when parsing input.
1906 *
1907 * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive.
1908 * @since 1.3
1909 */
1910 public boolean getIgnoreHeaderCase() {
1911 return ignoreHeaderCase;
1912 }
1913
1914 /**
1915 * Gets whether spaces around values are ignored when parsing input.
1916 *
1917 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
1918 */
1919 public boolean getIgnoreSurroundingSpaces() {
1920 return ignoreSurroundingSpaces;
1921 }
1922
1923 /**
1924 * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility.
1925 *
1926 * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility.
1927 * @since 1.11.0
1928 */
1929 public boolean getLenientEof() {
1930 return lenientEof;
1931 }
1932
1933 /**
1934 * Gets the maximum number of rows to process, excluding the header row.
1935 * <p>
1936 * Values less than or equal to 0 mean no limit.
1937 * </p>
1938 *
1939 * @return The maximum number of rows to process, excluding the header row.
1940 * @since 1.14.0
1941 */
1942 public long getMaxRows() {
1943 return maxRows;
1944 }
1945
1946 /**
1947 * Gets the String to convert to and from {@code null}.
1948 * <ul>
1949 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li>
1950 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
1951 * </ul>
1952 *
1953 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
1954 */
1955 public String getNullString() {
1956 return nullString;
1957 }
1958
1959 /**
1960 * Gets the character used to encapsulate values containing special characters.
1961 *
1962 * @return the quoteChar character, may be {@code null}
1963 */
1964 public Character getQuoteCharacter() {
1965 return quoteCharacter;
1966 }
1967
1968 /**
1969 * Gets the quote policy output fields.
1970 *
1971 * @return the quote policy
1972 */
1973 public QuoteMode getQuoteMode() {
1974 return quoteMode;
1975 }
1976
1977 /**
1978 * Gets the record separator delimiting output records.
1979 *
1980 * @return the record separator
1981 */
1982 public String getRecordSeparator() {
1983 return recordSeparator;
1984 }
1985
1986 /**
1987 * Gets whether to skip the header record.
1988 *
1989 * @return whether to skip the header record.
1990 */
1991 public boolean getSkipHeaderRecord() {
1992 return skipHeaderRecord;
1993 }
1994
1995 /**
1996 * Gets whether reading trailing data is allowed in records, helps Excel compatibility.
1997 *
1998 * @return whether reading trailing data is allowed in records, helps Excel compatibility.
1999 * @since 1.11.0
2000 */
2001 public boolean getTrailingData() {
2002 return trailingData;
2003 }
2004
2005 /**
2006 * Gets whether to add a trailing delimiter.
2007 *
2008 * @return whether to add a trailing delimiter.
2009 * @since 1.3
2010 */
2011 public boolean getTrailingDelimiter() {
2012 return trailingDelimiter;
2013 }
2014
2015 /**
2016 * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)}
2017 *
2018 * @return whether to trim leading and trailing blanks.
2019 */
2020 public boolean getTrim() {
2021 return trim;
2022 }
2023
2024 @Override
2025 public int hashCode() {
2026 final int prime = 31;
2027 int result = 1;
2028 result = prime * result + Arrays.hashCode(headerComments);
2029 result = prime * result + Arrays.hashCode(headers);
2030 return prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter,
2031 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString,
2032 recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim);
2033 }
2034
2035 /**
2036 * Tests whether comments are supported by this format.
2037 *
2038 * Note that the comment introducer character is only recognized at the start of a line.
2039 *
2040 * @return {@code true} is comments are supported, {@code false} otherwise
2041 */
2042 public boolean isCommentMarkerSet() {
2043 return commentMarker != null;
2044 }
2045
2046 /**
2047 * Tests whether the next characters constitute a delimiter
2048 *
2049 * @param ch0 the first char (index 0).
2050 * @param charSeq the match char sequence
2051 * @param startIndex where start to match
2052 * @param delimiter the delimiter
2053 * @param delimiterLength the delimiter length
2054 * @return true if the match is successful
2055 */
2056 private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) {
2057 if (ch0 != delimiter[0]) {
2058 return false;
2059 }
2060 final int len = charSeq.length();
2061 if (startIndex + delimiterLength > len) {
2062 return false;
2063 }
2064 for (int i = 1; i < delimiterLength; i++) {
2065 if (charSeq.charAt(startIndex + i) != delimiter[i]) {
2066 return false;
2067 }
2068 }
2069 return true;
2070 }
2071
2072 /**
2073 * Tests whether escapes are being processed.
2074 *
2075 * @return {@code true} if escapes are processed
2076 */
2077 public boolean isEscapeCharacterSet() {
2078 return escapeCharacter != null;
2079 }
2080
2081 /**
2082 * Tests whether a null string has been defined.
2083 *
2084 * @return {@code true} if a nullString is defined
2085 */
2086 public boolean isNullStringSet() {
2087 return nullString != null;
2088 }
2089
2090 /**
2091 * Tests whether a quoteChar has been defined.
2092 *
2093 * @return {@code true} if a quoteChar is defined
2094 */
2095 public boolean isQuoteCharacterSet() {
2096 return quoteCharacter != null;
2097 }
2098
2099 <T> IOStream<T> limit(final IOStream<T> stream) {
2100 return useMaxRows() ? stream.limit(getMaxRows()) : stream;
2101 }
2102
2103 /**
2104 * Parses the specified content.
2105 *
2106 * <p>
2107 * See also the various static parse methods on {@link CSVParser}.
2108 * </p>
2109 *
2110 * @param reader the input stream
2111 * @return a parser over a stream of {@link CSVRecord}s.
2112 * @throws IOException If an I/O error occurs
2113 * @throws CSVException Thrown on invalid input.
2114 */
2115 public CSVParser parse(final Reader reader) throws IOException {
2116 return CSVParser.builder().setReader(reader).setFormat(this).get();
2117 }
2118
2119 /**
2120 * Prints to the specified output.
2121 *
2122 * <p>
2123 * See also {@link CSVPrinter}.
2124 * </p>
2125 *
2126 * @param out the output.
2127 * @return a printer to an output.
2128 * @throws IOException thrown if the optional header cannot be printed.
2129 */
2130 public CSVPrinter print(final Appendable out) throws IOException {
2131 return new CSVPrinter(out, this);
2132 }
2133
2134 /**
2135 * Prints to the specified {@code File} with given {@code Charset}.
2136 *
2137 * <p>
2138 * See also {@link CSVPrinter}.
2139 * </p>
2140 *
2141 * @param out the output.
2142 * @param charset A charset.
2143 * @return a printer to an output.
2144 * @throws IOException thrown if the optional header cannot be printed.
2145 * @since 1.5
2146 */
2147 public CSVPrinter print(final File out, final Charset charset) throws IOException {
2148 return print(out.toPath(), charset);
2149 }
2150
2151 private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException {
2152 // InputStream is never null here
2153 // There is nothing to escape when quoting is used which is the default.
2154 if (!newRecord) {
2155 append(getDelimiterString(), out);
2156 }
2157 final boolean quoteCharacterSet = isQuoteCharacterSet();
2158 if (quoteCharacterSet) {
2159 append(getQuoteCharacter().charValue(), out); // Explicit (un)boxing is intentional
2160 }
2161 // Stream the input to the output without reading or holding the whole value in memory.
2162 // AppendableOutputStream cannot "close" an Appendable.
2163 try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) {
2164 IOUtils.copy(inputStream, outputStream);
2165 }
2166 if (quoteCharacterSet) {
2167 append(getQuoteCharacter().charValue(), out); // Explicit (un)boxing is intentional
2168 }
2169 }
2170
2171 /**
2172 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to
2173 * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true.
2174 *
2175 * @param value value to output.
2176 * @param out where to print the value.
2177 * @param newRecord if this a new record.
2178 * @throws IOException If an I/O error occurs.
2179 * @since 1.4
2180 */
2181 public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
2182 // null values are considered empty
2183 // Only call CharSequence.toString() if you have to, helps GC-free use cases.
2184 CharSequence charSequence;
2185 if (value == null) {
2186 // https://issues.apache.org/jira/browse/CSV-203
2187 if (null == nullString) {
2188 charSequence = Constants.EMPTY;
2189 } else if (QuoteMode.ALL == quoteMode) {
2190 charSequence = quotedNullString;
2191 } else {
2192 charSequence = nullString;
2193 }
2194 } else if (value instanceof CharSequence) {
2195 charSequence = (CharSequence) value;
2196 } else if (value instanceof Reader) {
2197 print((Reader) value, out, newRecord);
2198 return;
2199 } else if (value instanceof InputStream) {
2200 print((InputStream) value, out, newRecord);
2201 return;
2202 } else {
2203 charSequence = value.toString();
2204 }
2205 charSequence = getTrim() ? trim(charSequence) : charSequence;
2206 print(value, charSequence, out, newRecord);
2207 }
2208
2209 private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException {
2210 final int offset = 0;
2211 final int len = value.length();
2212 if (!newRecord) {
2213 out.append(getDelimiterString());
2214 }
2215 if (object == null) {
2216 out.append(value);
2217 } else if (isQuoteCharacterSet()) {
2218 // The original object is needed so can check for Number
2219 printWithQuotes(object, value, out, newRecord);
2220 } else if (isEscapeCharacterSet()) {
2221 printWithEscapes(value, out);
2222 } else {
2223 out.append(value, offset, len);
2224 }
2225 }
2226
2227 /**
2228 * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close.
2229 *
2230 * <p>
2231 * See also {@link CSVPrinter}.
2232 * </p>
2233 *
2234 * @param out the output.
2235 * @param charset A charset.
2236 * @return a printer to an output.
2237 * @throws IOException thrown if the optional header cannot be printed.
2238 * @since 1.5
2239 */
2240 @SuppressWarnings("resource")
2241 public CSVPrinter print(final Path out, final Charset charset) throws IOException {
2242 return print(Files.newBufferedWriter(out, charset));
2243 }
2244
2245 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException {
2246 // Reader is never null here
2247 if (!newRecord) {
2248 append(getDelimiterString(), out);
2249 }
2250 if (isQuoteCharacterSet()) {
2251 printWithQuotes(reader, out);
2252 } else if (isEscapeCharacterSet()) {
2253 printWithEscapes(reader, out);
2254 } else if (out instanceof Writer) {
2255 IOUtils.copyLarge(reader, (Writer) out);
2256 } else {
2257 IOUtils.copy(reader, out);
2258 }
2259 }
2260
2261 /**
2262 * Prints to the {@link System#out}.
2263 *
2264 * <p>
2265 * See also {@link CSVPrinter}.
2266 * </p>
2267 *
2268 * @return a printer to {@link System#out}.
2269 * @throws IOException thrown if the optional header cannot be printed.
2270 * @since 1.5
2271 */
2272 public CSVPrinter printer() throws IOException {
2273 return new CSVPrinter(System.out, this);
2274 }
2275
2276 /**
2277 * Outputs the trailing delimiter (if set) followed by the record separator (if set).
2278 *
2279 * @param appendable where to write
2280 * @throws IOException If an I/O error occurs.
2281 * @since 1.4
2282 */
2283 public synchronized void println(final Appendable appendable) throws IOException {
2284 if (getTrailingDelimiter()) {
2285 append(getDelimiterString(), appendable);
2286 }
2287 if (recordSeparator != null) {
2288 append(recordSeparator, appendable);
2289 }
2290 }
2291
2292 /**
2293 * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator.
2294 *
2295 * <p>
2296 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
2297 * the record, so there is no need to call {@link #println(Appendable)}.
2298 * </p>
2299 *
2300 * @param appendable where to write.
2301 * @param values values to output.
2302 * @throws IOException If an I/O error occurs.
2303 * @since 1.4
2304 */
2305 public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException {
2306 for (int i = 0; i < values.length; i++) {
2307 print(values[i], appendable, i == 0);
2308 }
2309 println(appendable);
2310 }
2311
2312 /*
2313 * This method must only be called if escaping is enabled, otherwise can throw exceptions.
2314 */
2315 private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException {
2316 int start = 0;
2317 int pos = 0;
2318 final int end = charSeq.length();
2319 final char[] delimArray = getDelimiterCharArray();
2320 final int delimLength = delimArray.length;
2321 final char escape = getEscapeChar();
2322 while (pos < end) {
2323 char c = charSeq.charAt(pos);
2324 final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength);
2325 final boolean isCr = c == Constants.CR;
2326 final boolean isLf = c == Constants.LF;
2327 if (isCr || isLf || c == escape || isDelimiterStart) {
2328 // write out segment up until this char
2329 if (pos > start) {
2330 appendable.append(charSeq, start, pos);
2331 }
2332 if (isLf) {
2333 c = 'n';
2334 } else if (isCr) {
2335 c = 'r';
2336 }
2337 escape(c, appendable);
2338 if (isDelimiterStart) {
2339 for (int i = 1; i < delimLength; i++) {
2340 pos++;
2341 escape(charSeq.charAt(pos), appendable);
2342 }
2343 }
2344 start = pos + 1; // start on the current char after this one
2345 }
2346 pos++;
2347 }
2348
2349 // write last segment
2350 if (pos > start) {
2351 appendable.append(charSeq, start, pos);
2352 }
2353 }
2354
2355 /*
2356 * This method must only be called if escaping is enabled, otherwise can throw exceptions.
2357 */
2358 private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException {
2359 int start = 0;
2360 int pos = 0;
2361 @SuppressWarnings("resource") // Temp reader on input reader.
2362 final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader);
2363 final char[] delimArray = getDelimiterCharArray();
2364 final int delimLength = delimArray.length;
2365 final char escape = getEscapeChar();
2366 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
2367 int c;
2368 final char[] lookAheadBuffer = new char[delimLength - 1];
2369 while (EOF != (c = bufferedReader.read())) {
2370 builder.append((char) c);
2371 Arrays.fill(lookAheadBuffer, (char) 0);
2372 bufferedReader.peek(lookAheadBuffer);
2373 final String test = builder.toString() + new String(lookAheadBuffer);
2374 final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength);
2375 final boolean isCr = c == Constants.CR;
2376 final boolean isLf = c == Constants.LF;
2377 if (isCr || isLf || c == escape || isDelimiterStart) {
2378 // write out segment up until this char
2379 if (pos > start) {
2380 append(builder.substring(start, pos), appendable);
2381 builder.setLength(0);
2382 pos = -1;
2383 }
2384 if (isLf) {
2385 c = 'n';
2386 } else if (isCr) {
2387 c = 'r';
2388 }
2389 escape((char) c, appendable);
2390 if (isDelimiterStart) {
2391 for (int i = 1; i < delimLength; i++) {
2392 escape((char) bufferedReader.read(), appendable);
2393 }
2394 }
2395 start = pos + 1; // start on the current char after this one
2396 }
2397 pos++;
2398 }
2399 // write last segment
2400 if (pos > start) {
2401 appendable.append(builder, start, pos);
2402 }
2403 }
2404
2405 /*
2406 * This method must only be called if quoting is enabled, otherwise will generate NPE.
2407 * The original object is needed so can check for Number
2408 */
2409 private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException {
2410 boolean quote = false;
2411 int start = 0;
2412 int pos = 0;
2413 final int len = charSeq.length();
2414 final char[] delim = getDelimiterCharArray();
2415 final int delimLength = delim.length;
2416 final char quoteChar = getQuoteCharacter().charValue(); // Explicit (un)boxing is intentional
2417 // If escape char not specified, default to the quote char
2418 // This avoids having to keep checking whether there is an escape character
2419 // at the cost of checking against quote twice
2420 final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar;
2421 QuoteMode quoteModePolicy = getQuoteMode();
2422 if (quoteModePolicy == null) {
2423 quoteModePolicy = QuoteMode.MINIMAL;
2424 }
2425 switch (quoteModePolicy) {
2426 case ALL:
2427 case ALL_NON_NULL:
2428 quote = true;
2429 break;
2430 case NON_NUMERIC:
2431 quote = !(object instanceof Number);
2432 break;
2433 case NONE:
2434 // Use the existing escaping code
2435 printWithEscapes(charSeq, out);
2436 return;
2437 case MINIMAL:
2438 if (len <= 0) {
2439 // Always quote an empty token that is the first
2440 // on the line, as it may be the only thing on the
2441 // line. If it were not quoted in that case,
2442 // an empty line has no tokens.
2443 if (newRecord) {
2444 quote = true;
2445 }
2446 } else {
2447 char c = charSeq.charAt(pos);
2448 if (c <= Constants.COMMENT) {
2449 // Some other chars at the start of a value caused the parser to fail, so for now
2450 // encapsulate if we start in anything less than '#'. We are being conservative
2451 // by including the default comment char too.
2452 quote = true;
2453 } else {
2454 while (pos < len) {
2455 c = charSeq.charAt(pos);
2456 if (c == Constants.LF || c == Constants.CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) {
2457 quote = true;
2458 break;
2459 }
2460 pos++;
2461 }
2462
2463 if (!quote) {
2464 pos = len - 1;
2465 c = charSeq.charAt(pos);
2466 // Some other chars at the end caused the parser to fail, so for now
2467 // encapsulate if we end in anything less than ' '
2468 if (isTrimChar(c)) {
2469 quote = true;
2470 }
2471 }
2472 }
2473 }
2474 if (!quote) {
2475 // No encapsulation needed - write out the original value
2476 out.append(charSeq, start, len);
2477 return;
2478 }
2479 break;
2480 default:
2481 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
2482 }
2483 if (!quote) {
2484 // No encapsulation needed - write out the original value
2485 out.append(charSeq, start, len);
2486 return;
2487 }
2488 // We hit something that needed encapsulation
2489 out.append(quoteChar);
2490 // Pick up where we left off: pos should be positioned on the first character that caused
2491 // the need for encapsulation.
2492 while (pos < len) {
2493 final char c = charSeq.charAt(pos);
2494 if (c == quoteChar || c == escapeChar) {
2495 // write out the chunk up until this point
2496 out.append(charSeq, start, pos);
2497 out.append(escapeChar); // now output the escape
2498 start = pos; // and restart with the matched char
2499 }
2500 pos++;
2501 }
2502 // Write the last segment
2503 out.append(charSeq, start, pos);
2504 out.append(quoteChar);
2505 }
2506
2507 /**
2508 * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead.
2509 *
2510 * @param reader What to print
2511 * @param appendable Where to print it
2512 * @throws IOException If an I/O error occurs
2513 */
2514 private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException {
2515 if (getQuoteMode() == QuoteMode.NONE) {
2516 printWithEscapes(reader, appendable);
2517 return;
2518 }
2519 final char quote = getQuoteCharacter().charValue(); // Explicit (un)boxing is intentional
2520 // (1) Append opening quote
2521 append(quote, appendable);
2522 // (2) Append Reader contents, doubling quotes
2523 int c;
2524 while (EOF != (c = reader.read())) {
2525 append((char) c, appendable);
2526 if (c == quote) {
2527 append(quote, appendable);
2528 }
2529 }
2530 // (3) Append closing quote
2531 append(quote, appendable);
2532 }
2533
2534 @Override
2535 public String toString() {
2536 final StringBuilder sb = new StringBuilder();
2537 sb.append("Delimiter=<").append(delimiter).append('>');
2538 if (isEscapeCharacterSet()) {
2539 sb.append(Constants.SP);
2540 sb.append("Escape=<").append(escapeCharacter).append('>');
2541 }
2542 if (isQuoteCharacterSet()) {
2543 sb.append(Constants.SP);
2544 sb.append("QuoteChar=<").append(quoteCharacter).append('>');
2545 }
2546 if (quoteMode != null) {
2547 sb.append(Constants.SP);
2548 sb.append("QuoteMode=<").append(quoteMode).append('>');
2549 }
2550 if (isCommentMarkerSet()) {
2551 sb.append(Constants.SP);
2552 sb.append("CommentStart=<").append(commentMarker).append('>');
2553 }
2554 if (isNullStringSet()) {
2555 sb.append(Constants.SP);
2556 sb.append("NullString=<").append(nullString).append('>');
2557 }
2558 if (recordSeparator != null) {
2559 sb.append(Constants.SP);
2560 sb.append("RecordSeparator=<").append(recordSeparator).append('>');
2561 }
2562 if (getIgnoreEmptyLines()) {
2563 sb.append(" EmptyLines:ignored");
2564 }
2565 if (getIgnoreSurroundingSpaces()) {
2566 sb.append(" SurroundingSpaces:ignored");
2567 }
2568 if (getIgnoreHeaderCase()) {
2569 sb.append(" IgnoreHeaderCase:ignored");
2570 }
2571 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
2572 if (headerComments != null) {
2573 sb.append(Constants.SP);
2574 sb.append("HeaderComments:").append(Arrays.toString(headerComments));
2575 }
2576 if (headers != null) {
2577 sb.append(Constants.SP);
2578 sb.append("Header:").append(Arrays.toString(headers));
2579 }
2580 return sb.toString();
2581 }
2582
2583 String trim(final String value) {
2584 return getTrim() ? value.trim() : value;
2585 }
2586
2587 boolean useMaxRows() {
2588 return getMaxRows() > 0;
2589 }
2590
2591 boolean useRow(final long rowNum) {
2592 return !useMaxRows() || rowNum <= getMaxRows();
2593 }
2594
2595 /**
2596 * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
2597 * <p>
2598 * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used
2599 * for parsing, so it cannot be used here.
2600 * </p>
2601 *
2602 * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
2603 */
2604 private void validate() throws IllegalArgumentException {
2605 if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // Explicit (un)boxing is intentional
2606 throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
2607 }
2608 if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // Explicit (un)boxing is intentional
2609 throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
2610 }
2611 if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // Explicit (un)boxing is intentional
2612 throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
2613 }
2614 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
2615 throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')");
2616 }
2617 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
2618 throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')");
2619 }
2620 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
2621 throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set");
2622 }
2623 // Validate headers
2624 if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
2625 final Set<String> dupCheckSet = new HashSet<>(headers.length);
2626 final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY;
2627 for (final String header : headers) {
2628 final boolean blank = isBlank(header);
2629 // Sanitize all empty headers to the empty string "" when checking duplicates
2630 final boolean containsHeader = !dupCheckSet.add(blank ? "" : header);
2631 if (containsHeader && !(blank && emptyDuplicatesAllowed)) {
2632 throw new IllegalArgumentException(String.format(
2633 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header,
2634 Arrays.toString(headers)));
2635 }
2636 }
2637 }
2638 }
2639
2640 /**
2641 * Builds a new {@code CSVFormat} that allows duplicate header names.
2642 *
2643 * @return a new {@code CSVFormat} that allows duplicate header names
2644 * @since 1.7
2645 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)}
2646 */
2647 @Deprecated
2648 public CSVFormat withAllowDuplicateHeaderNames() {
2649 return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get();
2650 }
2651
2652 /**
2653 * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value.
2654 *
2655 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
2656 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
2657 * @since 1.7
2658 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)}
2659 */
2660 @Deprecated
2661 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
2662 final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
2663 return builder().setDuplicateHeaderMode(mode).get();
2664 }
2665
2666 /**
2667 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}.
2668 *
2669 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
2670 * @see Builder#setAllowMissingColumnNames(boolean)
2671 * @since 1.1
2672 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)}
2673 */
2674 @Deprecated
2675 public CSVFormat withAllowMissingColumnNames() {
2676 return builder().setAllowMissingColumnNames(true).get();
2677 }
2678
2679 /**
2680 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
2681 *
2682 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause
2683 * an {@link IllegalArgumentException} to be thrown.
2684 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
2685 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)}
2686 */
2687 @Deprecated
2688 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
2689 return builder().setAllowMissingColumnNames(allowMissingColumnNames).get();
2690 }
2691
2692 /**
2693 * Builds a new {@code CSVFormat} with whether to flush on close.
2694 *
2695 * @param autoFlush whether to flush on close.
2696 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting.
2697 * @since 1.6
2698 * @deprecated Use {@link Builder#setAutoFlush(boolean)}
2699 */
2700 @Deprecated
2701 public CSVFormat withAutoFlush(final boolean autoFlush) {
2702 return builder().setAutoFlush(autoFlush).get();
2703 }
2704
2705 /**
2706 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
2707 *
2708 * Note that the comment start character is only recognized at the start of a line.
2709 *
2710 * @param commentMarker the comment start marker
2711 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
2712 * @throws IllegalArgumentException thrown if the specified character is a line break
2713 * @deprecated Use {@link Builder#setCommentMarker(char)}
2714 */
2715 @Deprecated
2716 public CSVFormat withCommentMarker(final char commentMarker) {
2717 return builder().setCommentMarker(commentMarker).get();
2718 }
2719
2720 /**
2721 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
2722 *
2723 * Note that the comment start character is only recognized at the start of a line.
2724 *
2725 * @param commentMarker the comment start marker, use {@code null} to disable
2726 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
2727 * @throws IllegalArgumentException thrown if the specified character is a line break
2728 * @deprecated Use {@link Builder#setCommentMarker(Character)}
2729 */
2730 @Deprecated
2731 public CSVFormat withCommentMarker(final Character commentMarker) {
2732 return builder().setCommentMarker(commentMarker).get();
2733 }
2734
2735 /**
2736 * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
2737 *
2738 * @param delimiter the delimiter character
2739 * @return A new CSVFormat that is equal to this with the specified character as a delimiter
2740 * @throws IllegalArgumentException thrown if the specified character is a line break
2741 * @deprecated Use {@link Builder#setDelimiter(char)}
2742 */
2743 @Deprecated
2744 public CSVFormat withDelimiter(final char delimiter) {
2745 return builder().setDelimiter(delimiter).get();
2746 }
2747
2748 /**
2749 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character.
2750 *
2751 * @param escape the escape character
2752 * @return A new CSVFormat that is equal to this but with the specified character as the escape character
2753 * @throws IllegalArgumentException thrown if the specified character is a line break
2754 * @deprecated Use {@link Builder#setEscape(char)}
2755 */
2756 @Deprecated
2757 public CSVFormat withEscape(final char escape) {
2758 return builder().setEscape(escape).get();
2759 }
2760
2761 /**
2762 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character.
2763 *
2764 * @param escape the escape character, use {@code null} to disable
2765 * @return A new CSVFormat that is equal to this but with the specified character as the escape character
2766 * @throws IllegalArgumentException thrown if the specified character is a line break
2767 * @deprecated Use {@link Builder#setEscape(Character)}
2768 */
2769 @Deprecated
2770 public CSVFormat withEscape(final Character escape) {
2771 return builder().setEscape(escape).get();
2772 }
2773
2774 // @formatter:off
2775 /**
2776 * Builds a new {@code CSVFormat} using the first record as header.
2777 *
2778 * <p>
2779 * Calling this method is equivalent to calling:
2780 * </p>
2781 *
2782 * <pre>
2783 * CSVFormat format = aFormat.builder()
2784 * .setHeader()
2785 * .setSkipHeaderRecord(true)
2786 * .get();
2787 * </pre>
2788 *
2789 * @return A new CSVFormat that is equal to this but using the first record as header.
2790 * @see Builder#setSkipHeaderRecord(boolean)
2791 * @see Builder#setHeader(String...)
2792 * @since 1.3
2793 * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}.
2794 */
2795 // @formatter:on
2796 @Deprecated
2797 public CSVFormat withFirstRecordAsHeader() {
2798 // @formatter:off
2799 return builder()
2800 .setHeader()
2801 .setSkipHeaderRecord(true)
2802 .get();
2803 // @formatter:on
2804 }
2805
2806 /**
2807 * Builds a new {@code CSVFormat} with the header of the format defined by the enum class.
2808 *
2809 * <p>
2810 * Example:
2811 * </p>
2812 *
2813 * <pre>
2814 * public enum MyHeader {
2815 * Name, Email, Phone
2816 * }
2817 * ...
2818 * CSVFormat format = aFormat.builder().setHeader(MyHeader.class).get();
2819 * </pre>
2820 * <p>
2821 * The header is also used by the {@link CSVPrinter}.
2822 * </p>
2823 *
2824 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
2825 * @return A new CSVFormat that is equal to this but with the specified header
2826 * @see Builder#setHeader(String...)
2827 * @see Builder#setSkipHeaderRecord(boolean)
2828 * @since 1.3
2829 * @deprecated Use {@link Builder#setHeader(Class)}
2830 */
2831 @Deprecated
2832 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) {
2833 return builder().setHeader(headerEnum).get();
2834 }
2835
2836 /**
2837 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
2838 * input file with:
2839 *
2840 * <pre>
2841 * CSVFormat format = aFormat.builder().setHeader().get();
2842 * </pre>
2843 *
2844 * or specified manually with:
2845 *
2846 * <pre>
2847 * CSVFormat format = aFormat.builder().setHeader(resultSet).get();
2848 * </pre>
2849 * <p>
2850 * The header is also used by the {@link CSVPrinter}.
2851 * </p>
2852 *
2853 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
2854 * @return A new CSVFormat that is equal to this but with the specified header
2855 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
2856 * @since 1.1
2857 * @deprecated Use {@link Builder#setHeader(ResultSet)}
2858 */
2859 @Deprecated
2860 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
2861 return builder().setHeader(resultSet).get();
2862 }
2863
2864 /**
2865 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the
2866 * input file with:
2867 *
2868 * <pre>
2869 * CSVFormat format = aFormat.builder().setHeader().get()
2870 * </pre>
2871 *
2872 * or specified manually with:
2873 *
2874 * <pre>
2875 * CSVFormat format = aFormat.builder().setHeader(resultSetMetaData).get()
2876 * </pre>
2877 * <p>
2878 * The header is also used by the {@link CSVPrinter}.
2879 * </p>
2880 *
2881 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
2882 * @return A new CSVFormat that is equal to this but with the specified header
2883 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
2884 * @since 1.1
2885 * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)}
2886 */
2887 @Deprecated
2888 public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException {
2889 return builder().setHeader(resultSetMetaData).get();
2890 }
2891
2892 /**
2893 * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file
2894 * with:
2895 *
2896 * <pre>
2897 * CSVFormat format = aFormat.builder().setHeader().get();
2898 * </pre>
2899 *
2900 * or specified manually with:
2901 *
2902 * <pre>{@code
2903 * CSVFormat format = aFormat.builder().setHeader("name", "email", "phone").get();
2904 * }</pre>
2905 * <p>
2906 * The header is also used by the {@link CSVPrinter}.
2907 * </p>
2908 *
2909 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
2910 * @return A new CSVFormat that is equal to this but with the specified header
2911 * @see Builder#setSkipHeaderRecord(boolean)
2912 * @deprecated Use {@link Builder#setHeader(String...)}
2913 */
2914 @Deprecated
2915 public CSVFormat withHeader(final String... header) {
2916 return builder().setHeader(header).get();
2917 }
2918
2919 /**
2920 * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers.
2921 * This setting is ignored by the parser.
2922 *
2923 * <pre>{@code
2924 * CSVFormat format = aFormat.builder().setHeaderComments("Generated by Apache Commons CSV.", Instant.now()).get();
2925 * }</pre>
2926 *
2927 * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data.
2928 * @return A new CSVFormat that is equal to this but with the specified header
2929 * @see Builder#setSkipHeaderRecord(boolean)
2930 * @since 1.1
2931 * @deprecated Use {@link Builder#setHeaderComments(Object...)}
2932 */
2933 @Deprecated
2934 public CSVFormat withHeaderComments(final Object... headerComments) {
2935 return builder().setHeaderComments(headerComments).get();
2936 }
2937
2938 /**
2939 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
2940 *
2941 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
2942 * @see Builder#setIgnoreEmptyLines(boolean)
2943 * @since 1.1
2944 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)}
2945 */
2946 @Deprecated
2947 public CSVFormat withIgnoreEmptyLines() {
2948 return builder().setIgnoreEmptyLines(true).get();
2949 }
2950
2951 /**
2952 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
2953 *
2954 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty
2955 * lines to empty records.
2956 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
2957 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)}
2958 */
2959 @Deprecated
2960 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
2961 return builder().setIgnoreEmptyLines(ignoreEmptyLines).get();
2962 }
2963
2964 /**
2965 * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
2966 *
2967 * @return A new CSVFormat that will ignore the new case header name behavior.
2968 * @see Builder#setIgnoreHeaderCase(boolean)
2969 * @since 1.3
2970 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}
2971 */
2972 @Deprecated
2973 public CSVFormat withIgnoreHeaderCase() {
2974 return builder().setIgnoreHeaderCase(true).get();
2975 }
2976
2977 /**
2978 * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case.
2979 *
2980 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is.
2981 * @return A new CSVFormat that will ignore case header name if specified as {@code true}
2982 * @since 1.3
2983 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)}
2984 */
2985 @Deprecated
2986 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
2987 return builder().setIgnoreHeaderCase(ignoreHeaderCase).get();
2988 }
2989
2990 /**
2991 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
2992 *
2993 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior.
2994 * @see Builder#setIgnoreSurroundingSpaces(boolean)
2995 * @since 1.1
2996 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)}
2997 */
2998 @Deprecated
2999 public CSVFormat withIgnoreSurroundingSpaces() {
3000 return builder().setIgnoreSurroundingSpaces(true).get();
3001 }
3002
3003 /**
3004 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
3005 *
3006 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is.
3007 * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
3008 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)}
3009 */
3010 @Deprecated
3011 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
3012 return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).get();
3013 }
3014
3015 /**
3016 * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output.
3017 * <ul>
3018 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li>
3019 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
3020 * </ul>
3021 *
3022 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}
3023 * @return A new CSVFormat that is equal to this but with the specified null conversion string.
3024 * @deprecated Use {@link Builder#setNullString(String)}
3025 */
3026 @Deprecated
3027 public CSVFormat withNullString(final String nullString) {
3028 return builder().setNullString(nullString).get();
3029 }
3030
3031 /**
3032 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
3033 *
3034 * @param quoteChar the quote character
3035 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
3036 * @throws IllegalArgumentException thrown if the specified character is a line break
3037 * @deprecated Use {@link Builder#setQuote(char)}
3038 */
3039 @Deprecated
3040 public CSVFormat withQuote(final char quoteChar) {
3041 return builder().setQuote(quoteChar).get();
3042 }
3043
3044 /**
3045 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
3046 *
3047 * @param quoteChar the quote character, use {@code null} to disable.
3048 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
3049 * @throws IllegalArgumentException thrown if the specified character is a line break
3050 * @deprecated Use {@link Builder#setQuote(Character)}
3051 */
3052 @Deprecated
3053 public CSVFormat withQuote(final Character quoteChar) {
3054 return builder().setQuote(quoteChar).get();
3055 }
3056
3057 /**
3058 * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
3059 *
3060 * @param quoteMode the quote policy to use for output.
3061 * @return A new CSVFormat that is equal to this but with the specified quote policy
3062 * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)}
3063 */
3064 @Deprecated
3065 public CSVFormat withQuoteMode(final QuoteMode quoteMode) {
3066 return builder().setQuoteMode(quoteMode).get();
3067 }
3068
3069 /**
3070 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character.
3071 *
3072 * <p>
3073 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and
3074 * "\r\n"
3075 * </p>
3076 *
3077 * @param recordSeparator the record separator to use for output.
3078 * @return A new CSVFormat that is equal to this but with the specified output record separator
3079 * @deprecated Use {@link Builder#setRecordSeparator(char)}
3080 */
3081 @Deprecated
3082 public CSVFormat withRecordSeparator(final char recordSeparator) {
3083 return builder().setRecordSeparator(recordSeparator).get();
3084 }
3085
3086 /**
3087 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String.
3088 *
3089 * <p>
3090 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and
3091 * "\r\n"
3092 * </p>
3093 *
3094 * @param recordSeparator the record separator to use for output.
3095 * @return A new CSVFormat that is equal to this but with the specified output record separator
3096 * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF
3097 * @deprecated Use {@link Builder#setRecordSeparator(String)}
3098 */
3099 @Deprecated
3100 public CSVFormat withRecordSeparator(final String recordSeparator) {
3101 return builder().setRecordSeparator(recordSeparator).get();
3102 }
3103
3104 /**
3105 * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}.
3106 *
3107 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
3108 * @see Builder#setSkipHeaderRecord(boolean)
3109 * @see Builder#setHeader(String...)
3110 * @since 1.1
3111 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)}
3112 */
3113 @Deprecated
3114 public CSVFormat withSkipHeaderRecord() {
3115 return builder().setSkipHeaderRecord(true).get();
3116 }
3117
3118 /**
3119 * Builds a new {@code CSVFormat} with whether to skip the header record.
3120 *
3121 * @param skipHeaderRecord whether to skip the header record.
3122 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
3123 * @see Builder#setHeader(String...)
3124 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)}
3125 */
3126 @Deprecated
3127 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
3128 return builder().setSkipHeaderRecord(skipHeaderRecord).get();
3129 }
3130
3131 /**
3132 * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows
3133 * and LF on Linux.
3134 *
3135 * <p>
3136 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and
3137 * "\r\n"
3138 * </p>
3139 *
3140 * @return A new CSVFormat that is equal to this but with the operating system's line separator string.
3141 * @since 1.6
3142 * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())}
3143 */
3144 @Deprecated
3145 public CSVFormat withSystemRecordSeparator() {
3146 return builder().setRecordSeparator(System.lineSeparator()).get();
3147 }
3148
3149 /**
3150 * Builds a new {@code CSVFormat} to add a trailing delimiter.
3151 *
3152 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting.
3153 * @since 1.3
3154 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)}
3155 */
3156 @Deprecated
3157 public CSVFormat withTrailingDelimiter() {
3158 return builder().setTrailingDelimiter(true).get();
3159 }
3160
3161 /**
3162 * Builds a new {@code CSVFormat} with whether to add a trailing delimiter.
3163 *
3164 * @param trailingDelimiter whether to add a trailing delimiter.
3165 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting.
3166 * @since 1.3
3167 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)}
3168 */
3169 @Deprecated
3170 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
3171 return builder().setTrailingDelimiter(trailingDelimiter).get();
3172 }
3173
3174 /**
3175 * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
3176 *
3177 * @return A new CSVFormat that is equal to this but with the trim setting on.
3178 * @since 1.3
3179 * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)}
3180 */
3181 @Deprecated
3182 public CSVFormat withTrim() {
3183 return builder().setTrim(true).get();
3184 }
3185
3186 /**
3187 * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used.
3188 *
3189 * @param trim whether to trim leading and trailing blanks.
3190 * @return A new CSVFormat that is equal to this but with the specified trim setting.
3191 * @since 1.3
3192 * @deprecated Use {@link Builder#setTrim(boolean)}
3193 */
3194 @Deprecated
3195 public CSVFormat withTrim(final boolean trim) {
3196 return builder().setTrim(trim).get();
3197 }
3198
3199 }