View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.csv;
19  
20  import java.io.Flushable;
21  import java.io.IOException;
22  
23  /**
24   * Print values as a comma separated list.
25   */
26  public class CSVPrinter {
27  
28      /** The place that the values get written. */
29      private final Appendable out;
30      private final CSVFormat format;
31  
32      /** True if we just began a new line. */
33      private boolean newLine = true;
34  
35      /**
36       * Create a printer that will print values to the given stream following the CSVFormat.
37       * <p/>
38       * Currently, only a pure encapsulation format or a pure escaping format
39       * is supported. Hybrid formats (encapsulation and escaping with a different character) are not supported.
40       *
41       * @param out    stream to which to print.
42       * @param format the CSV format. If null the default format is used ({@link CSVFormat#DEFAULT})
43       * @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
44       */
45      public CSVPrinter(Appendable out, CSVFormat format) {
46          this.out = out;
47          this.format = format == null ? CSVFormat.DEFAULT : format;
48          
49          this.format.validate();
50      }
51  
52      // ======================================================
53      //  printing implementation
54      // ======================================================
55  
56      /**
57       * Output a blank line
58       */
59      public void println() throws IOException {
60          out.append(format.getLineSeparator());
61          newLine = true;
62      }
63  
64      /**
65       * Flush the underlying stream.
66       * 
67       * @throws IOException
68       */
69      public void flush() throws IOException {
70          if (out instanceof Flushable) {
71              ((Flushable) out).flush();
72          }
73      }
74  
75      /**
76       * Print a single line of comma separated values.
77       * The values will be quoted if needed.  Quotes and
78       * newLine characters will be escaped.
79       *
80       * @param values values to be outputted.
81       */
82      public void println(String... values) throws IOException {
83          for (String value : values) {
84              print(value);
85          }
86          println();
87      }
88  
89  
90      /**
91       * Put a comment on a new line among the comma separated values. Comments
92       * will always begin on a new line and occupy a least one full line. The
93       * character specified to start comments and a space will be inserted at
94       * the beginning of each new line in the comment.
95       * <p/>
96       * If comments are disabled in the current CSV format this method does nothing.
97       *
98       * @param comment the comment to output
99       */
100     public void printComment(String comment) throws IOException {
101         if (format.isCommentingDisabled()) {
102             return;
103         }
104         if (!newLine) {
105             println();
106         }
107         out.append(format.getCommentStart());
108         out.append(' ');
109         for (int i = 0; i < comment.length(); i++) {
110             char c = comment.charAt(i);
111             switch (c) {
112                 case '\r':
113                     if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
114                         i++;
115                     }
116                 //$FALL-THROUGH$ break intentionally excluded.
117             case '\n':
118                     println();
119                     out.append(format.getCommentStart());
120                     out.append(' ');
121                     break;
122                 default:
123                     out.append(c);
124                     break;
125             }
126         }
127         println();
128     }
129 
130 
131     private void print(CharSequence value, int offset, int len) throws IOException {        
132         if (format.isEncapsulating()) {
133             printAndEncapsulate(value, offset, len);
134         } else if (format.isEscaping()) {
135             printAndEscape(value, offset, len);
136         } else {
137             printSep();
138             out.append(value, offset, offset + len);
139         }
140     }
141 
142     void printSep() throws IOException {
143         if (newLine) {
144             newLine = false;
145         } else {
146             out.append(format.getDelimiter());
147         }
148     }
149 
150     void printAndEscape(CharSequence value, int offset, int len) throws IOException {
151         int start = offset;
152         int pos = offset;
153         int end = offset + len;
154 
155         printSep();
156 
157         char delim = format.getDelimiter();
158         char escape = format.getEscape();
159 
160         while (pos < end) {
161             char c = value.charAt(pos);
162             if (c == '\r' || c == '\n' || c == delim || c == escape) {
163                 // write out segment up until this char
164                 if (pos > start) {
165                     out.append(value, start, pos);
166                 }
167                 if (c == '\n') {
168                     c = 'n';
169                 } else if (c == '\r') {
170                     c = 'r';
171                 }
172 
173                 out.append(escape);
174                 out.append(c);
175 
176                 start = pos + 1; // start on the current char after this one
177             }
178 
179             pos++;
180         }
181 
182         // write last segment
183         if (pos > start) {
184             out.append(value, start, pos);
185         }
186     }
187 
188     void printAndEncapsulate(CharSequence value, int offset, int len) throws IOException {
189         boolean first = newLine;  // is this the first value on this line?
190         boolean quote = false;
191         int start = offset;
192         int pos = offset;
193         int end = offset + len;
194 
195         printSep();
196 
197         char delim = format.getDelimiter();
198         char encapsulator = format.getEncapsulator();
199 
200         if (len <= 0) {
201             // always quote an empty token that is the first
202             // on the line, as it may be the only thing on the
203             // line. If it were not quoted in that case,
204             // an empty line has no tokens.
205             if (first) {
206                 quote = true;
207             }
208         } else {
209             char c = value.charAt(pos);
210 
211             // Hmmm, where did this rule come from?
212             if (first
213                     && (c < '0'
214                     || (c > '9' && c < 'A')
215                     || (c > 'Z' && c < 'a')
216                     || (c > 'z'))) {
217                 quote = true;
218                 // } else if (c == ' ' || c == '\f' || c == '\t') {
219             } else if (c <= '#') {
220                 // Some other chars at the start of a value caused the parser to fail, so for now
221                 // encapsulate if we start in anything less than '#'.  We are being conservative
222                 // by including the default comment char too.
223                 quote = true;
224             } else {
225                 while (pos < end) {
226                     c = value.charAt(pos);
227                     if (c == '\n' || c == '\r' || c == encapsulator || c == delim) {
228                         quote = true;
229                         break;
230                     }
231                     pos++;
232                 }
233 
234                 if (!quote) {
235                     pos = end - 1;
236                     c = value.charAt(pos);
237                     // if (c == ' ' || c == '\f' || c == '\t') {
238                     // Some other chars at the end caused the parser to fail, so for now
239                     // encapsulate if we end in anything less than ' '
240                     if (c <= ' ') {
241                         quote = true;
242                     }
243                 }
244             }
245         }
246 
247         if (!quote) {
248             // no encapsulation needed - write out the original value
249             out.append(value, start, end);
250             return;
251         }
252 
253         // we hit something that needed encapsulation
254         out.append(encapsulator);
255 
256         // Pick up where we left off: pos should be positioned on the first character that caused
257         // the need for encapsulation.
258         while (pos < end) {
259             char c = value.charAt(pos);
260             if (c == encapsulator) {
261                 // write out the chunk up until this point
262 
263                 // add 1 to the length to write out the encapsulator also
264                 out.append(value, start, pos + 1);
265                 // put the next starting position on the encapsulator so we will
266                 // write it out again with the next string (effectively doubling it)
267                 start = pos;
268             }
269             pos++;
270         }
271 
272         // write the last segment
273         out.append(value, start, pos);
274         out.append(encapsulator);
275     }
276 
277     /**
278      * Print the string as the next value on the line. The value
279      * will be escaped or encapsulated as needed if checkForEscape==true
280      *
281      * @param value value to be outputted.
282      */
283     public void print(String value, boolean checkForEscape) throws IOException {
284         if (value == null) {
285             // null values are considered empty
286             value = "";
287         }
288         
289         if (!checkForEscape) {
290             // write directly from string
291             printSep();
292             out.append(value);
293         } else {
294             print(value, 0, value.length());
295         }
296     }
297 
298     /**
299      * Print the string as the next value on the line. The value
300      * will be escaped or encapsulated as needed.
301      *
302      * @param value value to be outputted.
303      */
304     public void print(String value) throws IOException {
305         print(value, true);
306     }
307 }