View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import static org.apache.commons.csv.Constants.COMMENT;
21  import static org.apache.commons.csv.Constants.CR;
22  import static org.apache.commons.csv.Constants.LF;
23  import static org.apache.commons.csv.Constants.SP;
24  
25  import java.io.Closeable;
26  import java.io.Flushable;
27  import java.io.IOException;
28  import java.sql.ResultSet;
29  import java.sql.SQLException;
30  
31  /**
32   * Prints values in a CSV format.
33   *
34   * @version $Id: CSVPrinter.java 1695167 2015-08-10 21:08:58Z ggregory $
35   */
36  public final class CSVPrinter implements Flushable, Closeable {
37  
38      /** The place that the values get written. */
39      private final Appendable out;
40      private final CSVFormat format;
41  
42      /** True if we just began a new record. */
43      private boolean newRecord = true;
44  
45      /**
46       * Creates a printer that will print values to the given stream following the CSVFormat.
47       * <p>
48       * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
49       * and escaping with a different character) are not supported.
50       * </p>
51       *
52       * @param out
53       *            stream to which to print. Must not be null.
54       * @param format
55       *            the CSV format. Must not be null.
56       * @throws IOException
57       *             thrown if the optional header cannot be printed.
58       * @throws IllegalArgumentException
59       *             thrown if the parameters of the format are inconsistent or if either out or format are null.
60       */
61      public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
62          Assertions.notNull(out, "out");
63          Assertions.notNull(format, "format");
64  
65          this.out = out;
66          this.format = format;
67          // TODO: Is it a good idea to do this here instead of on the first call to a print method?
68          // It seems a pain to have to track whether the header has already been printed or not.
69          if (format.getHeaderComments() != null) {
70              for (final String line : format.getHeaderComments()) {
71                  if (line != null) {
72                      this.printComment(line);
73                  }
74              }
75          }
76          if (format.getHeader() != null) {
77              this.printRecord((Object[]) format.getHeader());
78          }
79      }
80  
81      // ======================================================
82      // printing implementation
83      // ======================================================
84  
85      @Override
86      public void close() throws IOException {
87          if (out instanceof Closeable) {
88              ((Closeable) out).close();
89          }
90      }
91  
92      /**
93       * Flushes the underlying stream.
94       *
95       * @throws IOException
96       *             If an I/O error occurs
97       */
98      @Override
99      public void flush() throws IOException {
100         if (out instanceof Flushable) {
101             ((Flushable) out).flush();
102         }
103     }
104 
105     /**
106      * Gets the target Appendable.
107      *
108      * @return the target Appendable.
109      */
110     public Appendable getOut() {
111         return this.out;
112     }
113 
114     /**
115      * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
116      *
117      * @param value
118      *            value to be output.
119      * @throws IOException
120      *             If an I/O error occurs
121      */
122     public void print(final Object value) throws IOException {
123         // null values are considered empty
124         String strValue;
125         if (value == null) {
126             final String nullString = format.getNullString();
127             strValue = nullString == null ? Constants.EMPTY : nullString;
128         } else {
129             strValue = value.toString();
130         }
131         this.print(value, strValue, 0, strValue.length());
132     }
133 
134     private void print(final Object object, final CharSequence value, final int offset, final int len)
135             throws IOException {
136         if (!newRecord) {
137             out.append(format.getDelimiter());
138         }
139         if (format.isQuoteCharacterSet()) {
140             // the original object is needed so can check for Number
141             printAndQuote(object, value, offset, len);
142         } else if (format.isEscapeCharacterSet()) {
143             printAndEscape(value, offset, len);
144         } else {
145             out.append(value, offset, offset + len);
146         }
147         newRecord = false;
148     }
149 
150     /*
151      * Note: must only be called if escaping is enabled, otherwise will generate NPE
152      */
153     private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException {
154         int start = offset;
155         int pos = offset;
156         final int end = offset + len;
157 
158         final char delim = format.getDelimiter();
159         final char escape = format.getEscapeCharacter().charValue();
160 
161         while (pos < end) {
162             char c = value.charAt(pos);
163             if (c == CR || c == LF || c == delim || c == escape) {
164                 // write out segment up until this char
165                 if (pos > start) {
166                     out.append(value, start, pos);
167                 }
168                 if (c == LF) {
169                     c = 'n';
170                 } else if (c == CR) {
171                     c = 'r';
172                 }
173 
174                 out.append(escape);
175                 out.append(c);
176 
177                 start = pos + 1; // start on the current char after this one
178             }
179 
180             pos++;
181         }
182 
183         // write last segment
184         if (pos > start) {
185             out.append(value, start, pos);
186         }
187     }
188 
189     /*
190      * Note: must only be called if quoting is enabled, otherwise will generate NPE
191      */
192     // the original object is needed so can check for Number
193     private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len)
194             throws IOException {
195         boolean quote = false;
196         int start = offset;
197         int pos = offset;
198         final int end = offset + len;
199 
200         final char delimChar = format.getDelimiter();
201         final char quoteChar = format.getQuoteCharacter().charValue();
202 
203         QuoteMode quoteModePolicy = format.getQuoteMode();
204         if (quoteModePolicy == null) {
205             quoteModePolicy = QuoteMode.MINIMAL;
206         }
207         switch (quoteModePolicy) {
208         case ALL:
209             quote = true;
210             break;
211         case NON_NUMERIC:
212             quote = !(object instanceof Number);
213             break;
214         case NONE:
215             // Use the existing escaping code
216             printAndEscape(value, offset, len);
217             return;
218         case MINIMAL:
219             if (len <= 0) {
220                 // always quote an empty token that is the first
221                 // on the line, as it may be the only thing on the
222                 // line. If it were not quoted in that case,
223                 // an empty line has no tokens.
224                 if (newRecord) {
225                     quote = true;
226                 }
227             } else {
228                 char c = value.charAt(pos);
229 
230                 // TODO where did this rule come from?
231                 if (newRecord && (c < '0' || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z'))) {
232                     quote = true;
233                 } else if (c <= COMMENT) {
234                     // Some other chars at the start of a value caused the parser to fail, so for now
235                     // encapsulate if we start in anything less than '#'. We are being conservative
236                     // by including the default comment char too.
237                     quote = true;
238                 } else {
239                     while (pos < end) {
240                         c = value.charAt(pos);
241                         if (c == LF || c == CR || c == quoteChar || c == delimChar) {
242                             quote = true;
243                             break;
244                         }
245                         pos++;
246                     }
247 
248                     if (!quote) {
249                         pos = end - 1;
250                         c = value.charAt(pos);
251                         // Some other chars at the end caused the parser to fail, so for now
252                         // encapsulate if we end in anything less than ' '
253                         if (c <= SP) {
254                             quote = true;
255                         }
256                     }
257                 }
258             }
259 
260             if (!quote) {
261                 // no encapsulation needed - write out the original value
262                 out.append(value, start, end);
263                 return;
264             }
265             break;
266         default:
267             throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
268         }
269 
270         if (!quote) {
271             // no encapsulation needed - write out the original value
272             out.append(value, start, end);
273             return;
274         }
275 
276         // we hit something that needed encapsulation
277         out.append(quoteChar);
278 
279         // Pick up where we left off: pos should be positioned on the first character that caused
280         // the need for encapsulation.
281         while (pos < end) {
282             final char c = value.charAt(pos);
283             if (c == quoteChar) {
284                 // write out the chunk up until this point
285 
286                 // add 1 to the length to write out the encapsulator also
287                 out.append(value, start, pos + 1);
288                 // put the next starting position on the encapsulator so we will
289                 // write it out again with the next string (effectively doubling it)
290                 start = pos;
291             }
292             pos++;
293         }
294 
295         // write the last segment
296         out.append(value, start, pos);
297         out.append(quoteChar);
298     }
299 
300     /**
301      * Prints a comment on a new line among the delimiter separated values.
302      *
303      * <p>
304      * Comments will always begin on a new line and occupy a least one full line. The character specified to start
305      * comments and a space will be inserted at the beginning of each new line in the comment.
306      * </p>
307      *
308      * If comments are disabled in the current CSV format this method does nothing.
309      *
310      * @param comment
311      *            the comment to output
312      * @throws IOException
313      *             If an I/O error occurs
314      */
315     public void printComment(final String comment) throws IOException {
316         if (!format.isCommentMarkerSet()) {
317             return;
318         }
319         if (!newRecord) {
320             println();
321         }
322         out.append(format.getCommentMarker().charValue());
323         out.append(SP);
324         for (int i = 0; i < comment.length(); i++) {
325             final char c = comment.charAt(i);
326             switch (c) {
327             case CR:
328                 if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
329                     i++;
330                 }
331                 //$FALL-THROUGH$ break intentionally excluded.
332             case LF:
333                 println();
334                 out.append(format.getCommentMarker().charValue());
335                 out.append(SP);
336                 break;
337             default:
338                 out.append(c);
339                 break;
340             }
341         }
342         println();
343     }
344 
345     /**
346      * Outputs the record separator.
347      *
348      * @throws IOException
349      *             If an I/O error occurs
350      */
351     public void println() throws IOException {
352         final String recordSeparator = format.getRecordSeparator();
353         if (recordSeparator != null) {
354             out.append(recordSeparator);
355         }
356         newRecord = true;
357     }
358 
359     /**
360      * Prints the given values a single record of delimiter separated values followed by the record separator.
361      *
362      * <p>
363      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
364      * separator to the output after printing the record, so there is no need to call {@link #println()}.
365      * </p>
366      *
367      * @param values
368      *            values to output.
369      * @throws IOException
370      *             If an I/O error occurs
371      */
372     public void printRecord(final Iterable<?> values) throws IOException {
373         for (final Object value : values) {
374             print(value);
375         }
376         println();
377     }
378 
379     /**
380      * Prints the given values a single record of delimiter separated values followed by the record separator.
381      *
382      * <p>
383      * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
384      * separator to the output after printing the record, so there is no need to call {@link #println()}.
385      * </p>
386      *
387      * @param values
388      *            values to output.
389      * @throws IOException
390      *             If an I/O error occurs
391      */
392     public void printRecord(final Object... values) throws IOException {
393         for (final Object value : values) {
394             print(value);
395         }
396         println();
397     }
398 
399     /**
400      * Prints all the objects in the given collection handling nested collections/arrays as records.
401      *
402      * <p>
403      * If the given collection only contains simple objects, this method will print a single record like
404      * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
405      * will each be printed as records using {@link #printRecord(Object...)}.
406      * </p>
407      *
408      * <p>
409      * Given the following data structure:
410      * </p>
411      *
412      * <pre>
413      * <code>
414      * List&lt;String[]&gt; data = ...
415      * data.add(new String[]{ "A", "B", "C" });
416      * data.add(new String[]{ "1", "2", "3" });
417      * data.add(new String[]{ "A1", "B2", "C3" });
418      * </code>
419      * </pre>
420      *
421      * <p>
422      * Calling this method will print:
423      * </p>
424      *
425      * <pre>
426      * <code>
427      * A, B, C
428      * 1, 2, 3
429      * A1, B2, C3
430      * </code>
431      * </pre>
432      *
433      * @param values
434      *            the values to print.
435      * @throws IOException
436      *             If an I/O error occurs
437      */
438     public void printRecords(final Iterable<?> values) throws IOException {
439         for (final Object value : values) {
440             if (value instanceof Object[]) {
441                 this.printRecord((Object[]) value);
442             } else if (value instanceof Iterable) {
443                 this.printRecord((Iterable<?>) value);
444             } else {
445                 this.printRecord(value);
446             }
447         }
448     }
449 
450     /**
451      * Prints all the objects in the given array handling nested collections/arrays as records.
452      *
453      * <p>
454      * If the given array only contains simple objects, this method will print a single record like
455      * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
456      * elements will each be printed as records using {@link #printRecord(Object...)}.
457      * </p>
458      *
459      * <p>
460      * Given the following data structure:
461      * </p>
462      *
463      * <pre>
464      * <code>
465      * String[][] data = new String[3][]
466      * data[0] = String[]{ "A", "B", "C" };
467      * data[1] = new String[]{ "1", "2", "3" };
468      * data[2] = new String[]{ "A1", "B2", "C3" };
469      * </code>
470      * </pre>
471      *
472      * <p>
473      * Calling this method will print:
474      * </p>
475      *
476      * <pre>
477      * <code>
478      * A, B, C
479      * 1, 2, 3
480      * A1, B2, C3
481      * </code>
482      * </pre>
483      *
484      * @param values
485      *            the values to print.
486      * @throws IOException
487      *             If an I/O error occurs
488      */
489     public void printRecords(final Object... values) throws IOException {
490         for (final Object value : values) {
491             if (value instanceof Object[]) {
492                 this.printRecord((Object[]) value);
493             } else if (value instanceof Iterable) {
494                 this.printRecord((Iterable<?>) value);
495             } else {
496                 this.printRecord(value);
497             }
498         }
499     }
500 
501     /**
502      * Prints all the objects in the given JDBC result set.
503      *
504      * @param resultSet
505      *            result set the values to print.
506      * @throws IOException
507      *             If an I/O error occurs
508      * @throws SQLException
509      *             if a database access error occurs
510      */
511     public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
512         final int columnCount = resultSet.getMetaData().getColumnCount();
513         while (resultSet.next()) {
514             for (int i = 1; i <= columnCount; i++) {
515                 print(resultSet.getObject(i));
516             }
517             println();
518         }
519     }
520 }