View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.COMMENT;
21  import static org.apache.commons.csv.Constants.CR;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.SP;
24  
25  import java.io.Closeable;
26  import java.io.Flushable;
27  import java.io.IOException;
28  import java.sql.ResultSet;
29  import java.sql.SQLException;
30  
31  /**
32   * Prints values in a CSV format.
33   *
34   * @version $Id: CSVPrinter.java 1742467 2016-05-05 20:00:16Z britter $
35   */
36  public final class CSVPrinter implements Flushable, Closeable {
37  
38      /** The place that the values get written. */
39      private final Appendable out;
40      private final CSVFormat format;
41  
42      /** True if we just began a new record. */
43      private boolean newRecord = true;
44  
45      /**
46       * Creates a printer that will print values to the given stream following the CSVFormat.
47       * <p>
48       * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
49       * and escaping with a different character) are not supported.
50       * </p>
51       *
52       * @param out
53       *            stream to which to print. Must not be null.
54       * @param format
55       *            the CSV format. Must not be null.
56       * @throws IOException
57       *             thrown if the optional header cannot be printed.
58       * @throws IllegalArgumentException
59       *             thrown if the parameters of the format are inconsistent or if either out or format are null.
60       */
61      public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
62          Assertions.notNull(out, "out");
63          Assertions.notNull(format, "format");
64  
65          this.out = out;
66          this.format = format;
67          // TODO: Is it a good idea to do this here instead of on the first call to a print method?
68          // It seems a pain to have to track whether the header has already been printed or not.
69          if (format.getHeaderComments() != null) {
70              for (final String line : format.getHeaderComments()) {
71                  if (line != null) {
72                      this.printComment(line);
73                  }
74              }
75          }
76          if (format.getHeader() != null && !format.getSkipHeaderRecord()) {
77              this.printRecord((Object[]) format.getHeader());
78          }
79      }
80  
81      // ======================================================
82      // printing implementation
83      // ======================================================
84  
85      @Override
86      public void close() throws IOException {
87          if (out instanceof Closeable) {
88              ((Closeable) out).close();
89          }
90      }
91  
92      /**
93       * Flushes the underlying stream.
94       *
95       * @throws IOException
96       *             If an I/O error occurs
97       */
98      @Override
99      public void flush() throws IOException {
100         if (out instanceof Flushable) {
101             ((Flushable) out).flush();
102         }
103     }
104 
105     /**
106      * Gets the target Appendable.
107      *
108      * @return the target Appendable.
109      */
110     public Appendable getOut() {
111         return this.out;
112     }
113 
114     /**
115      * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
116      *
117      * @param value
118      *            value to be output.
119      * @throws IOException
120      *             If an I/O error occurs
121      */
122     public void print(final Object value) throws IOException {
123         // null values are considered empty
124         String strValue;
125         if (value == null) {
126             final String nullString = format.getNullString();
127             strValue = nullString == null ? Constants.EMPTY : nullString;
128         } else {
129             strValue = value.toString();
130         }
131         strValue = format.getTrim() ? strValue.trim() : strValue;
132         this.print(value, strValue, 0, strValue.length());
133     }
134 
135     private void print(final Object object, final CharSequence value, final int offset, final int len)
136             throws IOException {
137         if (!newRecord) {
138             out.append(format.getDelimiter());
139         }
140         if (object == null) {
141             out.append(value);
142         } else if (format.isQuoteCharacterSet()) {
143             // the original object is needed so can check for Number
144             printAndQuote(object, value, offset, len);
145         } else if (format.isEscapeCharacterSet()) {
146             printAndEscape(value, offset, len);
147         } else {
148             out.append(value, offset, offset + len);
149         }
150         newRecord = false;
151     }
152 
153     /*
154      * Note: must only be called if escaping is enabled, otherwise will generate NPE
155      */
156     private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException {
157         int start = offset;
158         int pos = offset;
159         final int end = offset + len;
160 
161         final char delim = format.getDelimiter();
162         final char escape = format.getEscapeCharacter().charValue();
163 
164         while (pos < end) {
165             char c = value.charAt(pos);
166             if (c == CR || c == LF || c == delim || c == escape) {
167                 // write out segment up until this char
168                 if (pos > start) {
169                     out.append(value, start, pos);
170                 }
171                 if (c == LF) {
172                     c = 'n';
173                 } else if (c == CR) {
174                     c = 'r';
175                 }
176 
177                 out.append(escape);
178                 out.append(c);
179 
180                 start = pos + 1; // start on the current char after this one
181             }
182 
183             pos++;
184         }
185 
186         // write last segment
187         if (pos > start) {
188             out.append(value, start, pos);
189         }
190     }
191 
192     /*
193      * Note: must only be called if quoting is enabled, otherwise will generate NPE
194      */
195     // the original object is needed so can check for Number
196     private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len)
197             throws IOException {
198         boolean quote = false;
199         int start = offset;
200         int pos = offset;
201         final int end = offset + len;
202 
203         final char delimChar = format.getDelimiter();
204         final char quoteChar = format.getQuoteCharacter().charValue();
205 
206         QuoteMode quoteModePolicy = format.getQuoteMode();
207         if (quoteModePolicy == null) {
208             quoteModePolicy = QuoteMode.MINIMAL;
209         }
210         switch (quoteModePolicy) {
211         case ALL:
212             quote = true;
213             break;
214         case NON_NUMERIC:
215             quote = !(object instanceof Number);
216             break;
217         case NONE:
218             // Use the existing escaping code
219             printAndEscape(value, offset, len);
220             return;
221         case MINIMAL:
222             if (len <= 0) {
223                 // always quote an empty token that is the first
224                 // on the line, as it may be the only thing on the
225                 // line. If it were not quoted in that case,
226                 // an empty line has no tokens.
227                 if (newRecord) {
228                     quote = true;
229                 }
230             } else {
231                 char c = value.charAt(pos);
232 
233                 // TODO where did this rule come from?
234                 if (newRecord && (c < '0' || c > '9' && c < 'A' || c > 'Z' && c < 'a' || c > 'z')) {
235                     quote = true;
236                 } else if (c <= COMMENT) {
237                     // Some other chars at the start of a value caused the parser to fail, so for now
238                     // encapsulate if we start in anything less than '#'. We are being conservative
239                     // by including the default comment char too.
240                     quote = true;
241                 } else {
242                     while (pos < end) {
243                         c = value.charAt(pos);
244                         if (c == LF || c == CR || c == quoteChar || c == delimChar) {
245                             quote = true;
246                             break;
247                         }
248                         pos++;
249                     }
250 
251                     if (!quote) {
252                         pos = end - 1;
253                         c = value.charAt(pos);
254                         // Some other chars at the end caused the parser to fail, so for now
255                         // encapsulate if we end in anything less than ' '
256                         if (c <= SP) {
257                             quote = true;
258                         }
259                     }
260                 }
261             }
262 
263             if (!quote) {
264                 // no encapsulation needed - write out the original value
265                 out.append(value, start, end);
266                 return;
267             }
268             break;
269         default:
270             throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
271         }
272 
273         if (!quote) {
274             // no encapsulation needed - write out the original value
275             out.append(value, start, end);
276             return;
277         }
278 
279         // we hit something that needed encapsulation
280         out.append(quoteChar);
281 
282         // Pick up where we left off: pos should be positioned on the first character that caused
283         // the need for encapsulation.
284         while (pos < end) {
285             final char c = value.charAt(pos);
286             if (c == quoteChar) {
287                 // write out the chunk up until this point
288 
289                 // add 1 to the length to write out the encapsulator also
290                 out.append(value, start, pos + 1);
291                 // put the next starting position on the encapsulator so we will
292                 // write it out again with the next string (effectively doubling it)
293                 start = pos;
294             }
295             pos++;
296         }
297 
298         // write the last segment
299         out.append(value, start, pos);
300         out.append(quoteChar);
301     }
302 
303     /**
304      * Prints a comment on a new line among the delimiter separated values.
305      *
306      * <p>
307      * Comments will always begin on a new line and occupy a least one full line. The character specified to start
308      * comments and a space will be inserted at the beginning of each new line in the comment.
309      * </p>
310      *
311      * If comments are disabled in the current CSV format this method does nothing.
312      *
313      * @param comment
314      *            the comment to output
315      * @throws IOException
316      *             If an I/O error occurs
317      */
318     public void printComment(final String comment) throws IOException {
319         if (!format.isCommentMarkerSet()) {
320             return;
321         }
322         if (!newRecord) {
323             println();
324         }
325         out.append(format.getCommentMarker().charValue());
326         out.append(SP);
327         for (int i = 0; i < comment.length(); i++) {
328             final char c = comment.charAt(i);
329             switch (c) {
330             case CR:
331                 if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
332                     i++;
333                 }
334                 //$FALL-THROUGH$ break intentionally excluded.
335             case LF:
336                 println();
337                 out.append(format.getCommentMarker().charValue());
338                 out.append(SP);
339                 break;
340             default:
341                 out.append(c);
342                 break;
343             }
344         }
345         println();
346     }
347 
348     /**
349      * Outputs the record separator.
350      *
351      * @throws IOException
352      *             If an I/O error occurs
353      */
354     public void println() throws IOException {
355         if (format.getTrailingDelimiter()) {
356             out.append(format.getDelimiter());
357         }
358         final String recordSeparator = format.getRecordSeparator();
359         if (recordSeparator != null) {
360             out.append(recordSeparator);
361         }
362         newRecord = true;
363     }
364 
365     /**
366      * Prints the given values a single record of delimiter separated values followed by the record separator.
367      *
368      * <p>
369      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
370      * separator to the output after printing the record, so there is no need to call {@link #println()}.
371      * </p>
372      *
373      * @param values
374      *            values to output.
375      * @throws IOException
376      *             If an I/O error occurs
377      */
378     public void printRecord(final Iterable<?> values) throws IOException {
379         for (final Object value : values) {
380             print(value);
381         }
382         println();
383     }
384 
385     /**
386      * Prints the given values a single record of delimiter separated values followed by the record separator.
387      *
388      * <p>
389      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
390      * separator to the output after printing the record, so there is no need to call {@link #println()}.
391      * </p>
392      *
393      * @param values
394      *            values to output.
395      * @throws IOException
396      *             If an I/O error occurs
397      */
398     public void printRecord(final Object... values) throws IOException {
399         for (final Object value : values) {
400             print(value);
401         }
402         println();
403     }
404 
405     /**
406      * Prints all the objects in the given collection handling nested collections/arrays as records.
407      *
408      * <p>
409      * If the given collection only contains simple objects, this method will print a single record like
410      * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
411      * will each be printed as records using {@link #printRecord(Object...)}.
412      * </p>
413      *
414      * <p>
415      * Given the following data structure:
416      * </p>
417      *
418      * <pre>
419      * <code>
420      * List&lt;String[]&gt; data = ...
421      * data.add(new String[]{ "A", "B", "C" });
422      * data.add(new String[]{ "1", "2", "3" });
423      * data.add(new String[]{ "A1", "B2", "C3" });
424      * </code>
425      * </pre>
426      *
427      * <p>
428      * Calling this method will print:
429      * </p>
430      *
431      * <pre>
432      * <code>
433      * A, B, C
434      * 1, 2, 3
435      * A1, B2, C3
436      * </code>
437      * </pre>
438      *
439      * @param values
440      *            the values to print.
441      * @throws IOException
442      *             If an I/O error occurs
443      */
444     public void printRecords(final Iterable<?> values) throws IOException {
445         for (final Object value : values) {
446             if (value instanceof Object[]) {
447                 this.printRecord((Object[]) value);
448             } else if (value instanceof Iterable) {
449                 this.printRecord((Iterable<?>) value);
450             } else {
451                 this.printRecord(value);
452             }
453         }
454     }
455 
456     /**
457      * Prints all the objects in the given array handling nested collections/arrays as records.
458      *
459      * <p>
460      * If the given array only contains simple objects, this method will print a single record like
461      * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
462      * elements will each be printed as records using {@link #printRecord(Object...)}.
463      * </p>
464      *
465      * <p>
466      * Given the following data structure:
467      * </p>
468      *
469      * <pre>
470      * <code>
471      * String[][] data = new String[3][]
472      * data[0] = String[]{ "A", "B", "C" };
473      * data[1] = new String[]{ "1", "2", "3" };
474      * data[2] = new String[]{ "A1", "B2", "C3" };
475      * </code>
476      * </pre>
477      *
478      * <p>
479      * Calling this method will print:
480      * </p>
481      *
482      * <pre>
483      * <code>
484      * A, B, C
485      * 1, 2, 3
486      * A1, B2, C3
487      * </code>
488      * </pre>
489      *
490      * @param values
491      *            the values to print.
492      * @throws IOException
493      *             If an I/O error occurs
494      */
495     public void printRecords(final Object... values) throws IOException {
496         for (final Object value : values) {
497             if (value instanceof Object[]) {
498                 this.printRecord((Object[]) value);
499             } else if (value instanceof Iterable) {
500                 this.printRecord((Iterable<?>) value);
501             } else {
502                 this.printRecord(value);
503             }
504         }
505     }
506 
507     /**
508      * Prints all the objects in the given JDBC result set.
509      *
510      * @param resultSet
511      *            result set the values to print.
512      * @throws IOException
513      *             If an I/O error occurs
514      * @throws SQLException
515      *             if a database access error occurs
516      */
517     public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
518         final int columnCount = resultSet.getMetaData().getColumnCount();
519         while (resultSet.next()) {
520             for (int i = 1; i <= columnCount; i++) {
521                 print(resultSet.getObject(i));
522             }
523             println();
524         }
525     }
526 }