1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.csv;
19
20 import java.io.Flushable;
21 import java.io.IOException;
22
23 /**
24 * Print values as a comma separated list.
25 */
26 public class CSVPrinter {
27
28 /** The place that the values get written. */
29 private final Appendable out;
30 private final CSVFormat format;
31
32 /** True if we just began a new line. */
33 private boolean newLine = true;
34
35 /**
36 * Create a printer that will print values to the given stream following the CSVFormat.
37 * <p/>
38 * Currently, only a pure encapsulation format or a pure escaping format
39 * is supported. Hybrid formats (encapsulation and escaping with a different character) are not supported.
40 *
41 * @param out stream to which to print.
42 * @param format the CSV format. If null the default format is used ({@link CSVFormat#DEFAULT})
43 * @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
44 */
45 public CSVPrinter(Appendable out, CSVFormat format) {
46 this.out = out;
47 this.format = format == null ? CSVFormat.DEFAULT : format;
48
49 this.format.validate();
50 }
51
52 // ======================================================
53 // printing implementation
54 // ======================================================
55
56 /**
57 * Output a blank line
58 */
59 public void println() throws IOException {
60 out.append(format.getLineSeparator());
61 newLine = true;
62 }
63
64 /**
65 * Flush the underlying stream.
66 *
67 * @throws IOException
68 */
69 public void flush() throws IOException {
70 if (out instanceof Flushable) {
71 ((Flushable) out).flush();
72 }
73 }
74
75 /**
76 * Print a single line of comma separated values.
77 * The values will be quoted if needed. Quotes and
78 * newLine characters will be escaped.
79 *
80 * @param values values to be outputted.
81 */
82 public void println(String... values) throws IOException {
83 for (String value : values) {
84 print(value);
85 }
86 println();
87 }
88
89
90 /**
91 * Put a comment on a new line among the comma separated values. Comments
92 * will always begin on a new line and occupy a least one full line. The
93 * character specified to start comments and a space will be inserted at
94 * the beginning of each new line in the comment.
95 * <p/>
96 * If comments are disabled in the current CSV format this method does nothing.
97 *
98 * @param comment the comment to output
99 */
100 public void printComment(String comment) throws IOException {
101 if (format.isCommentingDisabled()) {
102 return;
103 }
104 if (!newLine) {
105 println();
106 }
107 out.append(format.getCommentStart());
108 out.append(' ');
109 for (int i = 0; i < comment.length(); i++) {
110 char c = comment.charAt(i);
111 switch (c) {
112 case '\r':
113 if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
114 i++;
115 }
116 //$FALL-THROUGH$ break intentionally excluded.
117 case '\n':
118 println();
119 out.append(format.getCommentStart());
120 out.append(' ');
121 break;
122 default:
123 out.append(c);
124 break;
125 }
126 }
127 println();
128 }
129
130
131 private void print(CharSequence value, int offset, int len) throws IOException {
132 if (format.isEncapsulating()) {
133 printAndEncapsulate(value, offset, len);
134 } else if (format.isEscaping()) {
135 printAndEscape(value, offset, len);
136 } else {
137 printSep();
138 out.append(value, offset, offset + len);
139 }
140 }
141
142 void printSep() throws IOException {
143 if (newLine) {
144 newLine = false;
145 } else {
146 out.append(format.getDelimiter());
147 }
148 }
149
150 void printAndEscape(CharSequence value, int offset, int len) throws IOException {
151 int start = offset;
152 int pos = offset;
153 int end = offset + len;
154
155 printSep();
156
157 char delim = format.getDelimiter();
158 char escape = format.getEscape();
159
160 while (pos < end) {
161 char c = value.charAt(pos);
162 if (c == '\r' || c == '\n' || c == delim || c == escape) {
163 // write out segment up until this char
164 if (pos > start) {
165 out.append(value, start, pos);
166 }
167 if (c == '\n') {
168 c = 'n';
169 } else if (c == '\r') {
170 c = 'r';
171 }
172
173 out.append(escape);
174 out.append(c);
175
176 start = pos + 1; // start on the current char after this one
177 }
178
179 pos++;
180 }
181
182 // write last segment
183 if (pos > start) {
184 out.append(value, start, pos);
185 }
186 }
187
188 void printAndEncapsulate(CharSequence value, int offset, int len) throws IOException {
189 boolean first = newLine; // is this the first value on this line?
190 boolean quote = false;
191 int start = offset;
192 int pos = offset;
193 int end = offset + len;
194
195 printSep();
196
197 char delim = format.getDelimiter();
198 char encapsulator = format.getEncapsulator();
199
200 if (len <= 0) {
201 // always quote an empty token that is the first
202 // on the line, as it may be the only thing on the
203 // line. If it were not quoted in that case,
204 // an empty line has no tokens.
205 if (first) {
206 quote = true;
207 }
208 } else {
209 char c = value.charAt(pos);
210
211 // Hmmm, where did this rule come from?
212 if (first
213 && (c < '0'
214 || (c > '9' && c < 'A')
215 || (c > 'Z' && c < 'a')
216 || (c > 'z'))) {
217 quote = true;
218 // } else if (c == ' ' || c == '\f' || c == '\t') {
219 } else if (c <= '#') {
220 // Some other chars at the start of a value caused the parser to fail, so for now
221 // encapsulate if we start in anything less than '#'. We are being conservative
222 // by including the default comment char too.
223 quote = true;
224 } else {
225 while (pos < end) {
226 c = value.charAt(pos);
227 if (c == '\n' || c == '\r' || c == encapsulator || c == delim) {
228 quote = true;
229 break;
230 }
231 pos++;
232 }
233
234 if (!quote) {
235 pos = end - 1;
236 c = value.charAt(pos);
237 // if (c == ' ' || c == '\f' || c == '\t') {
238 // Some other chars at the end caused the parser to fail, so for now
239 // encapsulate if we end in anything less than ' '
240 if (c <= ' ') {
241 quote = true;
242 }
243 }
244 }
245 }
246
247 if (!quote) {
248 // no encapsulation needed - write out the original value
249 out.append(value, start, end);
250 return;
251 }
252
253 // we hit something that needed encapsulation
254 out.append(encapsulator);
255
256 // Pick up where we left off: pos should be positioned on the first character that caused
257 // the need for encapsulation.
258 while (pos < end) {
259 char c = value.charAt(pos);
260 if (c == encapsulator) {
261 // write out the chunk up until this point
262
263 // add 1 to the length to write out the encapsulator also
264 out.append(value, start, pos + 1);
265 // put the next starting position on the encapsulator so we will
266 // write it out again with the next string (effectively doubling it)
267 start = pos;
268 }
269 pos++;
270 }
271
272 // write the last segment
273 out.append(value, start, pos);
274 out.append(encapsulator);
275 }
276
277 /**
278 * Print the string as the next value on the line. The value
279 * will be escaped or encapsulated as needed if checkForEscape==true
280 *
281 * @param value value to be outputted.
282 */
283 public void print(String value, boolean checkForEscape) throws IOException {
284 if (value == null) {
285 // null values are considered empty
286 value = "";
287 }
288
289 if (!checkForEscape) {
290 // write directly from string
291 printSep();
292 out.append(value);
293 } else {
294 print(value, 0, value.length());
295 }
296 }
297
298 /**
299 * Print the string as the next value on the line. The value
300 * will be escaped or encapsulated as needed.
301 *
302 * @param value value to be outputted.
303 */
304 public void print(String value) throws IOException {
305 print(value, true);
306 }
307 }