View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.COMMENT;
21  import static org.apache.commons.csv.Constants.CR;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.SP;
24  
25  import java.io.Closeable;
26  import java.io.Flushable;
27  import java.io.IOException;
28  import java.sql.ResultSet;
29  import java.sql.SQLException;
30  
31  /**
32   * Prints values in a CSV format.
33   *
34   * @version $Id: CSVPrinter.java 1638700 2014-11-12 03:57:12Z ggregory $
35   */
36  public final class CSVPrinter implements Flushable, Closeable {
37  
38      /** The place that the values get written. */
39      private final Appendable out;
40      private final CSVFormat format;
41  
42      /** True if we just began a new record. */
43      private boolean newRecord = true;
44  
45      /**
46       * Creates a printer that will print values to the given stream following the CSVFormat.
47       * <p>
48       * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
49       * and escaping with a different character) are not supported.
50       * </p>
51       *
52       * @param out
53       *            stream to which to print. Must not be null.
54       * @param format
55       *            the CSV format. Must not be null.
56       * @throws IOException
57       *             thrown if the optional header cannot be printed.
58       * @throws IllegalArgumentException
59       *             thrown if the parameters of the format are inconsistent or if either out or format are null.
60       */
61      public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
62          Assertions.notNull(out, "out");
63          Assertions.notNull(format, "format");
64  
65          this.out = out;
66          this.format = format;
67          // TODO: Is it a good idea to do this here instead of on the first call to a print method?
68          // It seems a pain to have to track whether the header has already been printed or not.
69          if (format.getHeaderComments() != null) {
70              for (String line : format.getHeaderComments()) {
71                  if (line != null) {
72                      this.printComment(line);
73                  }
74              }
75          }
76          if (format.getHeader() != null) {
77              this.printRecord((Object[]) format.getHeader());
78          }
79      }
80  
81      // ======================================================
82      // printing implementation
83      // ======================================================
84  
85      @Override
86      public void close() throws IOException {
87          if (out instanceof Closeable) {
88              ((Closeable) out).close();
89          }
90      }
91  
92      /**
93       * Flushes the underlying stream.
94       *
95       * @throws IOException
96       *             If an I/O error occurs
97       */
98      @Override
99      public void flush() throws IOException {
100         if (out instanceof Flushable) {
101             ((Flushable) out).flush();
102         }
103     }
104 
105     /**
106      * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
107      *
108      * @param value
109      *            value to be output.
110      * @throws IOException
111      *             If an I/O error occurs
112      */
113     public void print(final Object value) throws IOException {
114         // null values are considered empty
115         String strValue;
116         if (value == null) {
117             final String nullString = format.getNullString();
118             strValue = nullString == null ? Constants.EMPTY : nullString;
119         } else {
120             strValue = value.toString();
121         }
122         this.print(value, strValue, 0, strValue.length());
123     }
124 
125     private void print(final Object object, final CharSequence value, final int offset, final int len)
126             throws IOException {
127         if (!newRecord) {
128             out.append(format.getDelimiter());
129         }
130         if (format.isQuoteCharacterSet()) {
131             // the original object is needed so can check for Number
132             printAndQuote(object, value, offset, len);
133         } else if (format.isEscapeCharacterSet()) {
134             printAndEscape(value, offset, len);
135         } else {
136             out.append(value, offset, offset + len);
137         }
138         newRecord = false;
139     }
140 
141     /*
142      * Note: must only be called if escaping is enabled, otherwise will generate NPE
143      */
144     private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException {
145         int start = offset;
146         int pos = offset;
147         final int end = offset + len;
148 
149         final char delim = format.getDelimiter();
150         final char escape = format.getEscapeCharacter().charValue();
151 
152         while (pos < end) {
153             char c = value.charAt(pos);
154             if (c == CR || c == LF || c == delim || c == escape) {
155                 // write out segment up until this char
156                 if (pos > start) {
157                     out.append(value, start, pos);
158                 }
159                 if (c == LF) {
160                     c = 'n';
161                 } else if (c == CR) {
162                     c = 'r';
163                 }
164 
165                 out.append(escape);
166                 out.append(c);
167 
168                 start = pos + 1; // start on the current char after this one
169             }
170 
171             pos++;
172         }
173 
174         // write last segment
175         if (pos > start) {
176             out.append(value, start, pos);
177         }
178     }
179 
180     /*
181      * Note: must only be called if quoting is enabled, otherwise will generate NPE
182      */
183     // the original object is needed so can check for Number
184     private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len)
185             throws IOException {
186         boolean quote = false;
187         int start = offset;
188         int pos = offset;
189         final int end = offset + len;
190 
191         final char delimChar = format.getDelimiter();
192         final char quoteChar = format.getQuoteCharacter().charValue();
193 
194         QuoteMode quoteModePolicy = format.getQuoteMode();
195         if (quoteModePolicy == null) {
196             quoteModePolicy = QuoteMode.MINIMAL;
197         }
198         switch (quoteModePolicy) {
199         case ALL:
200             quote = true;
201             break;
202         case NON_NUMERIC:
203             quote = !(object instanceof Number);
204             break;
205         case NONE:
206             // Use the existing escaping code
207             printAndEscape(value, offset, len);
208             return;
209         case MINIMAL:
210             if (len <= 0) {
211                 // always quote an empty token that is the first
212                 // on the line, as it may be the only thing on the
213                 // line. If it were not quoted in that case,
214                 // an empty line has no tokens.
215                 if (newRecord) {
216                     quote = true;
217                 }
218             } else {
219                 char c = value.charAt(pos);
220 
221                 // TODO where did this rule come from?
222                 if (newRecord && (c < '0' || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z'))) {
223                     quote = true;
224                 } else if (c <= COMMENT) {
225                     // Some other chars at the start of a value caused the parser to fail, so for now
226                     // encapsulate if we start in anything less than '#'. We are being conservative
227                     // by including the default comment char too.
228                     quote = true;
229                 } else {
230                     while (pos < end) {
231                         c = value.charAt(pos);
232                         if (c == LF || c == CR || c == quoteChar || c == delimChar) {
233                             quote = true;
234                             break;
235                         }
236                         pos++;
237                     }
238 
239                     if (!quote) {
240                         pos = end - 1;
241                         c = value.charAt(pos);
242                         // Some other chars at the end caused the parser to fail, so for now
243                         // encapsulate if we end in anything less than ' '
244                         if (c <= SP) {
245                             quote = true;
246                         }
247                     }
248                 }
249             }
250 
251             if (!quote) {
252                 // no encapsulation needed - write out the original value
253                 out.append(value, start, end);
254                 return;
255             }
256             break;
257         default:
258             throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
259         }
260 
261         if (!quote) {
262             // no encapsulation needed - write out the original value
263             out.append(value, start, end);
264             return;
265         }
266 
267         // we hit something that needed encapsulation
268         out.append(quoteChar);
269 
270         // Pick up where we left off: pos should be positioned on the first character that caused
271         // the need for encapsulation.
272         while (pos < end) {
273             final char c = value.charAt(pos);
274             if (c == quoteChar) {
275                 // write out the chunk up until this point
276 
277                 // add 1 to the length to write out the encapsulator also
278                 out.append(value, start, pos + 1);
279                 // put the next starting position on the encapsulator so we will
280                 // write it out again with the next string (effectively doubling it)
281                 start = pos;
282             }
283             pos++;
284         }
285 
286         // write the last segment
287         out.append(value, start, pos);
288         out.append(quoteChar);
289     }
290 
291     /**
292      * Prints a comment on a new line among the delimiter separated values.
293      *
294      * <p>
295      * Comments will always begin on a new line and occupy a least one full line. The character specified to start
296      * comments and a space will be inserted at the beginning of each new line in the comment.
297      * </p>
298      *
299      * If comments are disabled in the current CSV format this method does nothing.
300      *
301      * @param comment
302      *            the comment to output
303      * @throws IOException
304      *             If an I/O error occurs
305      */
306     public void printComment(final String comment) throws IOException {
307         if (!format.isCommentMarkerSet()) {
308             return;
309         }
310         if (!newRecord) {
311             println();
312         }
313         out.append(format.getCommentMarker().charValue());
314         out.append(SP);
315         for (int i = 0; i < comment.length(); i++) {
316             final char c = comment.charAt(i);
317             switch (c) {
318             case CR:
319                 if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
320                     i++;
321                 }
322                 //$FALL-THROUGH$ break intentionally excluded.
323             case LF:
324                 println();
325                 out.append(format.getCommentMarker().charValue());
326                 out.append(SP);
327                 break;
328             default:
329                 out.append(c);
330                 break;
331             }
332         }
333         println();
334     }
335 
336     /**
337      * Outputs the record separator.
338      *
339      * @throws IOException
340      *             If an I/O error occurs
341      */
342     public void println() throws IOException {
343         final String recordSeparator = format.getRecordSeparator();
344         if (recordSeparator != null) {
345             out.append(recordSeparator);
346         }
347         newRecord = true;
348     }
349 
350     /**
351      * Prints the given values a single record of delimiter separated values followed by the record separator.
352      *
353      * <p>
354      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
355      * separator to the output after printing the record, so there is no need to call {@link #println()}.
356      * </p>
357      *
358      * @param values
359      *            values to output.
360      * @throws IOException
361      *             If an I/O error occurs
362      */
363     public void printRecord(final Iterable<?> values) throws IOException {
364         for (final Object value : values) {
365             print(value);
366         }
367         println();
368     }
369 
370     /**
371      * Prints the given values a single record of delimiter separated values followed by the record separator.
372      *
373      * <p>
374      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
375      * separator to the output after printing the record, so there is no need to call {@link #println()}.
376      * </p>
377      *
378      * @param values
379      *            values to output.
380      * @throws IOException
381      *             If an I/O error occurs
382      */
383     public void printRecord(final Object... values) throws IOException {
384         for (final Object value : values) {
385             print(value);
386         }
387         println();
388     }
389 
390     /**
391      * Prints all the objects in the given collection handling nested collections/arrays as records.
392      *
393      * <p>
394      * If the given collection only contains simple objects, this method will print a single record like
395      * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
396      * will each be printed as records using {@link #printRecord(Object...)}.
397      * </p>
398      *
399      * <p>
400      * Given the following data structure:
401      * </p>
402      *
403      * <pre>
404      * <code>
405      * List&lt;String[]&gt; data = ...
406      * data.add(new String[]{ "A", "B", "C" });
407      * data.add(new String[]{ "1", "2", "3" });
408      * data.add(new String[]{ "A1", "B2", "C3" });
409      * </code>
410      * </pre>
411      *
412      * <p>
413      * Calling this method will print:
414      * </p>
415      *
416      * <pre>
417      * <code>
418      * A, B, C
419      * 1, 2, 3
420      * A1, B2, C3
421      * </code>
422      * </pre>
423      *
424      * @param values
425      *            the values to print.
426      * @throws IOException
427      *             If an I/O error occurs
428      */
429     public void printRecords(final Iterable<?> values) throws IOException {
430         for (final Object value : values) {
431             if (value instanceof Object[]) {
432                 this.printRecord((Object[]) value);
433             } else if (value instanceof Iterable) {
434                 this.printRecord((Iterable<?>) value);
435             } else {
436                 this.printRecord(value);
437             }
438         }
439     }
440 
441     /**
442      * Prints all the objects in the given array handling nested collections/arrays as records.
443      *
444      * <p>
445      * If the given array only contains simple objects, this method will print a single record like
446      * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
447      * elements will each be printed as records using {@link #printRecord(Object...)}.
448      * </p>
449      *
450      * <p>
451      * Given the following data structure:
452      * </p>
453      *
454      * <pre>
455      * <code>
456      * String[][] data = new String[3][]
457      * data[0] = String[]{ "A", "B", "C" };
458      * data[1] = new String[]{ "1", "2", "3" };
459      * data[2] = new String[]{ "A1", "B2", "C3" };
460      * </code>
461      * </pre>
462      *
463      * <p>
464      * Calling this method will print:
465      * </p>
466      *
467      * <pre>
468      * <code>
469      * A, B, C
470      * 1, 2, 3
471      * A1, B2, C3
472      * </code>
473      * </pre>
474      *
475      * @param values
476      *            the values to print.
477      * @throws IOException
478      *             If an I/O error occurs
479      */
480     public void printRecords(final Object... values) throws IOException {
481         for (final Object value : values) {
482             if (value instanceof Object[]) {
483                 this.printRecord((Object[]) value);
484             } else if (value instanceof Iterable) {
485                 this.printRecord((Iterable<?>) value);
486             } else {
487                 this.printRecord(value);
488             }
489         }
490     }
491 
492     /**
493      * Prints all the objects in the given JDBC result set.
494      *
495      * @param resultSet
496      *            result set the values to print.
497      * @throws IOException
498      *             If an I/O error occurs
499      * @throws SQLException
500      *             if a database access error occurs
501      */
502     public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
503         final int columnCount = resultSet.getMetaData().getColumnCount();
504         while (resultSet.next()) {
505             for (int i = 1; i <= columnCount; i++) {
506                 print(resultSet.getObject(i));
507             }
508             println();
509         }
510     }
511 
512     /**
513      * Gets the target Appendable.
514      *
515      * @return the target Appendable.
516      */
517     public Appendable getOut() {
518         return this.out;
519     }
520 }