001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 import java.io.IOException;
020 import java.io.StringWriter;
021 import java.io.Writer;
022 import java.util.Locale;
023
024 import org.apache.commons.lang.exception.NestableRuntimeException;
025
026 /**
027 * <p>Escapes and unescapes <code>String</code>s for
028 * Java, Java Script, HTML, XML, and SQL.</p>
029 *
030 * @author Apache Software Foundation
031 * @author Apache Jakarta Turbine
032 * @author Purple Technology
033 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
034 * @author Antony Riley
035 * @author Helge Tesgaard
036 * @author <a href="sean@boohai.com">Sean Brown</a>
037 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
038 * @author Phil Steitz
039 * @author Pete Gieser
040 * @since 2.0
041 * @version $Id: StringEscapeUtils.java 905636 2010-02-02 14:03:32Z niallp $
042 */
043 public class StringEscapeUtils {
044
045 private static final char CSV_DELIMITER = ',';
046 private static final char CSV_QUOTE = '"';
047 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
048 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
049
050 /**
051 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
052 * standard programming.</p>
053 *
054 * <p>Instead, the class should be used as:
055 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
056 *
057 * <p>This constructor is public to permit tools that require a JavaBean
058 * instance to operate.</p>
059 */
060 public StringEscapeUtils() {
061 super();
062 }
063
064 // Java and JavaScript
065 //--------------------------------------------------------------------------
066 /**
067 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
068 *
069 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
070 *
071 * <p>So a tab becomes the characters <code>'\\'</code> and
072 * <code>'t'</code>.</p>
073 *
074 * <p>The only difference between Java strings and JavaScript strings
075 * is that in JavaScript, a single quote must be escaped.</p>
076 *
077 * <p>Example:
078 * <pre>
079 * input string: He didn't say, "Stop!"
080 * output string: He didn't say, \"Stop!\"
081 * </pre>
082 * </p>
083 *
084 * @param str String to escape values in, may be null
085 * @return String with escaped values, <code>null</code> if null string input
086 */
087 public static String escapeJava(String str) {
088 return escapeJavaStyleString(str, false, false);
089 }
090
091 /**
092 * <p>Escapes the characters in a <code>String</code> using Java String rules to
093 * a <code>Writer</code>.</p>
094 *
095 * <p>A <code>null</code> string input has no effect.</p>
096 *
097 * @see #escapeJava(java.lang.String)
098 * @param out Writer to write escaped string into
099 * @param str String to escape values in, may be null
100 * @throws IllegalArgumentException if the Writer is <code>null</code>
101 * @throws IOException if error occurs on underlying Writer
102 */
103 public static void escapeJava(Writer out, String str) throws IOException {
104 escapeJavaStyleString(out, str, false, false);
105 }
106
107 /**
108 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
109 * <p>Escapes any values it finds into their JavaScript String form.
110 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
111 *
112 * <p>So a tab becomes the characters <code>'\\'</code> and
113 * <code>'t'</code>.</p>
114 *
115 * <p>The only difference between Java strings and JavaScript strings
116 * is that in JavaScript, a single quote must be escaped.</p>
117 *
118 * <p>Example:
119 * <pre>
120 * input string: He didn't say, "Stop!"
121 * output string: He didn\'t say, \"Stop!\"
122 * </pre>
123 * </p>
124 *
125 * @param str String to escape values in, may be null
126 * @return String with escaped values, <code>null</code> if null string input
127 */
128 public static String escapeJavaScript(String str) {
129 return escapeJavaStyleString(str, true, true);
130 }
131
132 /**
133 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
134 * to a <code>Writer</code>.</p>
135 *
136 * <p>A <code>null</code> string input has no effect.</p>
137 *
138 * @see #escapeJavaScript(java.lang.String)
139 * @param out Writer to write escaped string into
140 * @param str String to escape values in, may be null
141 * @throws IllegalArgumentException if the Writer is <code>null</code>
142 * @throws IOException if error occurs on underlying Writer
143 **/
144 public static void escapeJavaScript(Writer out, String str) throws IOException {
145 escapeJavaStyleString(out, str, true, true);
146 }
147
148 /**
149 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
150 *
151 * @param str String to escape values in, may be null
152 * @param escapeSingleQuotes escapes single quotes if <code>true</code>
153 * @param escapeForwardSlash TODO
154 * @return the escaped string
155 */
156 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) {
157 if (str == null) {
158 return null;
159 }
160 try {
161 StringWriter writer = new StringWriter(str.length() * 2);
162 escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash);
163 return writer.toString();
164 } catch (IOException ioe) {
165 // this should never ever happen while writing to a StringWriter
166 throw new UnhandledException(ioe);
167 }
168 }
169
170 /**
171 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
172 *
173 * @param out write to receieve the escaped string
174 * @param str String to escape values in, may be null
175 * @param escapeSingleQuote escapes single quotes if <code>true</code>
176 * @param escapeForwardSlash TODO
177 * @throws IOException if an IOException occurs
178 */
179 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote,
180 boolean escapeForwardSlash) throws IOException {
181 if (out == null) {
182 throw new IllegalArgumentException("The Writer must not be null");
183 }
184 if (str == null) {
185 return;
186 }
187 int sz;
188 sz = str.length();
189 for (int i = 0; i < sz; i++) {
190 char ch = str.charAt(i);
191
192 // handle unicode
193 if (ch > 0xfff) {
194 out.write("\\u" + hex(ch));
195 } else if (ch > 0xff) {
196 out.write("\\u0" + hex(ch));
197 } else if (ch > 0x7f) {
198 out.write("\\u00" + hex(ch));
199 } else if (ch < 32) {
200 switch (ch) {
201 case '\b' :
202 out.write('\\');
203 out.write('b');
204 break;
205 case '\n' :
206 out.write('\\');
207 out.write('n');
208 break;
209 case '\t' :
210 out.write('\\');
211 out.write('t');
212 break;
213 case '\f' :
214 out.write('\\');
215 out.write('f');
216 break;
217 case '\r' :
218 out.write('\\');
219 out.write('r');
220 break;
221 default :
222 if (ch > 0xf) {
223 out.write("\\u00" + hex(ch));
224 } else {
225 out.write("\\u000" + hex(ch));
226 }
227 break;
228 }
229 } else {
230 switch (ch) {
231 case '\'' :
232 if (escapeSingleQuote) {
233 out.write('\\');
234 }
235 out.write('\'');
236 break;
237 case '"' :
238 out.write('\\');
239 out.write('"');
240 break;
241 case '\\' :
242 out.write('\\');
243 out.write('\\');
244 break;
245 case '/' :
246 if (escapeForwardSlash) {
247 out.write('\\');
248 }
249 out.write('/');
250 break;
251 default :
252 out.write(ch);
253 break;
254 }
255 }
256 }
257 }
258
259 /**
260 * <p>Returns an upper case hexadecimal <code>String</code> for the given
261 * character.</p>
262 *
263 * @param ch The character to convert.
264 * @return An upper case hexadecimal <code>String</code>
265 */
266 private static String hex(char ch) {
267 return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
268 }
269
270 /**
271 * <p>Unescapes any Java literals found in the <code>String</code>.
272 * For example, it will turn a sequence of <code>'\'</code> and
273 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
274 * is preceded by another <code>'\'</code>.</p>
275 *
276 * @param str the <code>String</code> to unescape, may be null
277 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
278 */
279 public static String unescapeJava(String str) {
280 if (str == null) {
281 return null;
282 }
283 try {
284 StringWriter writer = new StringWriter(str.length());
285 unescapeJava(writer, str);
286 return writer.toString();
287 } catch (IOException ioe) {
288 // this should never ever happen while writing to a StringWriter
289 throw new UnhandledException(ioe);
290 }
291 }
292
293 /**
294 * <p>Unescapes any Java literals found in the <code>String</code> to a
295 * <code>Writer</code>.</p>
296 *
297 * <p>For example, it will turn a sequence of <code>'\'</code> and
298 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
299 * is preceded by another <code>'\'</code>.</p>
300 *
301 * <p>A <code>null</code> string input has no effect.</p>
302 *
303 * @param out the <code>Writer</code> used to output unescaped characters
304 * @param str the <code>String</code> to unescape, may be null
305 * @throws IllegalArgumentException if the Writer is <code>null</code>
306 * @throws IOException if error occurs on underlying Writer
307 */
308 public static void unescapeJava(Writer out, String str) throws IOException {
309 if (out == null) {
310 throw new IllegalArgumentException("The Writer must not be null");
311 }
312 if (str == null) {
313 return;
314 }
315 int sz = str.length();
316 StringBuffer unicode = new StringBuffer(4);
317 boolean hadSlash = false;
318 boolean inUnicode = false;
319 for (int i = 0; i < sz; i++) {
320 char ch = str.charAt(i);
321 if (inUnicode) {
322 // if in unicode, then we're reading unicode
323 // values in somehow
324 unicode.append(ch);
325 if (unicode.length() == 4) {
326 // unicode now contains the four hex digits
327 // which represents our unicode character
328 try {
329 int value = Integer.parseInt(unicode.toString(), 16);
330 out.write((char) value);
331 unicode.setLength(0);
332 inUnicode = false;
333 hadSlash = false;
334 } catch (NumberFormatException nfe) {
335 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
336 }
337 }
338 continue;
339 }
340 if (hadSlash) {
341 // handle an escaped value
342 hadSlash = false;
343 switch (ch) {
344 case '\\':
345 out.write('\\');
346 break;
347 case '\'':
348 out.write('\'');
349 break;
350 case '\"':
351 out.write('"');
352 break;
353 case 'r':
354 out.write('\r');
355 break;
356 case 'f':
357 out.write('\f');
358 break;
359 case 't':
360 out.write('\t');
361 break;
362 case 'n':
363 out.write('\n');
364 break;
365 case 'b':
366 out.write('\b');
367 break;
368 case 'u':
369 {
370 // uh-oh, we're in unicode country....
371 inUnicode = true;
372 break;
373 }
374 default :
375 out.write(ch);
376 break;
377 }
378 continue;
379 } else if (ch == '\\') {
380 hadSlash = true;
381 continue;
382 }
383 out.write(ch);
384 }
385 if (hadSlash) {
386 // then we're in the weird case of a \ at the end of the
387 // string, let's output it anyway.
388 out.write('\\');
389 }
390 }
391
392 /**
393 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
394 *
395 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
396 * into a newline character, unless the <code>'\'</code> is preceded by another
397 * <code>'\'</code>.</p>
398 *
399 * @see #unescapeJava(String)
400 * @param str the <code>String</code> to unescape, may be null
401 * @return A new unescaped <code>String</code>, <code>null</code> if null string input
402 */
403 public static String unescapeJavaScript(String str) {
404 return unescapeJava(str);
405 }
406
407 /**
408 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
409 * <code>Writer</code>.</p>
410 *
411 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
412 * into a newline character, unless the <code>'\'</code> is preceded by another
413 * <code>'\'</code>.</p>
414 *
415 * <p>A <code>null</code> string input has no effect.</p>
416 *
417 * @see #unescapeJava(Writer,String)
418 * @param out the <code>Writer</code> used to output unescaped characters
419 * @param str the <code>String</code> to unescape, may be null
420 * @throws IllegalArgumentException if the Writer is <code>null</code>
421 * @throws IOException if error occurs on underlying Writer
422 */
423 public static void unescapeJavaScript(Writer out, String str) throws IOException {
424 unescapeJava(out, str);
425 }
426
427 // HTML and XML
428 //--------------------------------------------------------------------------
429 /**
430 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
431 *
432 * <p>
433 * For example:
434 * </p>
435 * <p><code>"bread" & "butter"</code></p>
436 * becomes:
437 * <p>
438 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
439 * </p>
440 *
441 * <p>Supports all known HTML 4.0 entities, including funky accents.
442 * Note that the commonly used apostrophe escape character (&apos;)
443 * is not a legal entity and so is not supported). </p>
444 *
445 * @param str the <code>String</code> to escape, may be null
446 * @return a new escaped <code>String</code>, <code>null</code> if null string input
447 *
448 * @see #unescapeHtml(String)
449 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
450 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
451 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
452 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
453 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
454 */
455 public static String escapeHtml(String str) {
456 if (str == null) {
457 return null;
458 }
459 try {
460 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
461 escapeHtml(writer, str);
462 return writer.toString();
463 } catch (IOException ioe) {
464 //should be impossible
465 throw new UnhandledException(ioe);
466 }
467 }
468
469 /**
470 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes
471 * them to a <code>Writer</code>.</p>
472 *
473 * <p>
474 * For example:
475 * </p>
476 * <code>"bread" & "butter"</code>
477 * <p>becomes:</p>
478 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
479 *
480 * <p>Supports all known HTML 4.0 entities, including funky accents.
481 * Note that the commonly used apostrophe escape character (&apos;)
482 * is not a legal entity and so is not supported). </p>
483 *
484 * @param writer the writer receiving the escaped string, not null
485 * @param string the <code>String</code> to escape, may be null
486 * @throws IllegalArgumentException if the writer is null
487 * @throws IOException when <code>Writer</code> passed throws the exception from
488 * calls to the {@link Writer#write(int)} methods.
489 *
490 * @see #escapeHtml(String)
491 * @see #unescapeHtml(String)
492 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
493 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
494 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
495 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
496 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
497 */
498 public static void escapeHtml(Writer writer, String string) throws IOException {
499 if (writer == null ) {
500 throw new IllegalArgumentException ("The Writer must not be null.");
501 }
502 if (string == null) {
503 return;
504 }
505 Entities.HTML40.escape(writer, string);
506 }
507
508 //-----------------------------------------------------------------------
509 /**
510 * <p>Unescapes a string containing entity escapes to a string
511 * containing the actual Unicode characters corresponding to the
512 * escapes. Supports HTML 4.0 entities.</p>
513 *
514 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
515 * will become "<Français>"</p>
516 *
517 * <p>If an entity is unrecognized, it is left alone, and inserted
518 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
519 * become ">&zzzz;x".</p>
520 *
521 * @param str the <code>String</code> to unescape, may be null
522 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
523 * @see #escapeHtml(Writer, String)
524 */
525 public static String unescapeHtml(String str) {
526 if (str == null) {
527 return null;
528 }
529 try {
530 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
531 unescapeHtml(writer, str);
532 return writer.toString();
533 } catch (IOException ioe) {
534 //should be impossible
535 throw new UnhandledException(ioe);
536 }
537 }
538
539 /**
540 * <p>Unescapes a string containing entity escapes to a string
541 * containing the actual Unicode characters corresponding to the
542 * escapes. Supports HTML 4.0 entities.</p>
543 *
544 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
545 * will become "<Français>"</p>
546 *
547 * <p>If an entity is unrecognized, it is left alone, and inserted
548 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
549 * become ">&zzzz;x".</p>
550 *
551 * @param writer the writer receiving the unescaped string, not null
552 * @param string the <code>String</code> to unescape, may be null
553 * @throws IllegalArgumentException if the writer is null
554 * @throws IOException if an IOException occurs
555 * @see #escapeHtml(String)
556 */
557 public static void unescapeHtml(Writer writer, String string) throws IOException {
558 if (writer == null ) {
559 throw new IllegalArgumentException ("The Writer must not be null.");
560 }
561 if (string == null) {
562 return;
563 }
564 Entities.HTML40.unescape(writer, string);
565 }
566
567 //-----------------------------------------------------------------------
568 /**
569 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
570 *
571 * <p>For example: <tt>"bread" & "butter"</tt> =>
572 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
573 * </p>
574 *
575 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
576 * Does not support DTDs or external entities.</p>
577 *
578 * <p>Note that unicode characters greater than 0x7f are currently escaped to
579 * their numerical \\u equivalent. This may change in future releases. </p>
580 *
581 * @param writer the writer receiving the unescaped string, not null
582 * @param str the <code>String</code> to escape, may be null
583 * @throws IllegalArgumentException if the writer is null
584 * @throws IOException if there is a problem writing
585 * @see #unescapeXml(java.lang.String)
586 */
587 public static void escapeXml(Writer writer, String str) throws IOException {
588 if (writer == null ) {
589 throw new IllegalArgumentException ("The Writer must not be null.");
590 }
591 if (str == null) {
592 return;
593 }
594 Entities.XML.escape(writer, str);
595 }
596
597 /**
598 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
599 *
600 * <p>For example: <tt>"bread" & "butter"</tt> =>
601 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
602 * </p>
603 *
604 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
605 * Does not support DTDs or external entities.</p>
606 *
607 * <p>Note that unicode characters greater than 0x7f are currently escaped to
608 * their numerical \\u equivalent. This may change in future releases. </p>
609 *
610 * @param str the <code>String</code> to escape, may be null
611 * @return a new escaped <code>String</code>, <code>null</code> if null string input
612 * @see #unescapeXml(java.lang.String)
613 */
614 public static String escapeXml(String str) {
615 if (str == null) {
616 return null;
617 }
618 return Entities.XML.escape(str);
619 }
620
621 //-----------------------------------------------------------------------
622 /**
623 * <p>Unescapes a string containing XML entity escapes to a string
624 * containing the actual Unicode characters corresponding to the
625 * escapes.</p>
626 *
627 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
628 * Does not support DTDs or external entities.</p>
629 *
630 * <p>Note that numerical \\u unicode codes are unescaped to their respective
631 * unicode characters. This may change in future releases. </p>
632 *
633 * @param writer the writer receiving the unescaped string, not null
634 * @param str the <code>String</code> to unescape, may be null
635 * @throws IllegalArgumentException if the writer is null
636 * @throws IOException if there is a problem writing
637 * @see #escapeXml(String)
638 */
639 public static void unescapeXml(Writer writer, String str) throws IOException {
640 if (writer == null ) {
641 throw new IllegalArgumentException ("The Writer must not be null.");
642 }
643 if (str == null) {
644 return;
645 }
646 Entities.XML.unescape(writer, str);
647 }
648
649 /**
650 * <p>Unescapes a string containing XML entity escapes to a string
651 * containing the actual Unicode characters corresponding to the
652 * escapes.</p>
653 *
654 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
655 * Does not support DTDs or external entities.</p>
656 *
657 * <p>Note that numerical \\u unicode codes are unescaped to their respective
658 * unicode characters. This may change in future releases. </p>
659 *
660 * @param str the <code>String</code> to unescape, may be null
661 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
662 * @see #escapeXml(String)
663 */
664 public static String unescapeXml(String str) {
665 if (str == null) {
666 return null;
667 }
668 return Entities.XML.unescape(str);
669 }
670
671 //-----------------------------------------------------------------------
672 /**
673 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
674 * an SQL query.</p>
675 *
676 * <p>For example,
677 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
678 * StringEscapeUtils.escapeSql("McHale's Navy") +
679 * "'");</pre>
680 * </p>
681 *
682 * <p>At present, this method only turns single-quotes into doubled single-quotes
683 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
684 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
685 *
686 * see http://www.jguru.com/faq/view.jsp?EID=8881
687 * @param str the string to escape, may be null
688 * @return a new String, escaped for SQL, <code>null</code> if null string input
689 */
690 public static String escapeSql(String str) {
691 if (str == null) {
692 return null;
693 }
694 return StringUtils.replace(str, "'", "''");
695 }
696
697 //-----------------------------------------------------------------------
698
699 /**
700 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes,
701 * if required.</p>
702 *
703 * <p>If the value contains a comma, newline or double quote, then the
704 * String value is returned enclosed in double quotes.</p>
705 * </p>
706 *
707 * <p>Any double quote characters in the value are escaped with another double quote.</p>
708 *
709 * <p>If the value does not contain a comma, newline or double quote, then the
710 * String value is returned unchanged.</p>
711 * </p>
712 *
713 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
714 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
715 *
716 * @param str the input CSV column String, may be null
717 * @return the input String, enclosed in double quotes if the value contains a comma,
718 * newline or double quote, <code>null</code> if null string input
719 * @since 2.4
720 */
721 public static String escapeCsv(String str) {
722 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
723 return str;
724 }
725 try {
726 StringWriter writer = new StringWriter();
727 escapeCsv(writer, str);
728 return writer.toString();
729 } catch (IOException ioe) {
730 // this should never ever happen while writing to a StringWriter
731 throw new UnhandledException(ioe);
732 }
733 }
734
735 /**
736 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes,
737 * if required.</p>
738 *
739 * <p>If the value contains a comma, newline or double quote, then the
740 * String value is written enclosed in double quotes.</p>
741 * </p>
742 *
743 * <p>Any double quote characters in the value are escaped with another double quote.</p>
744 *
745 * <p>If the value does not contain a comma, newline or double quote, then the
746 * String value is written unchanged (null values are ignored).</p>
747 * </p>
748 *
749 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
750 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
751 *
752 * @param str the input CSV column String, may be null
753 * @param out Writer to write input string to, enclosed in double quotes if it contains
754 * a comma, newline or double quote
755 * @throws IOException if error occurs on underlying Writer
756 * @since 2.4
757 */
758 public static void escapeCsv(Writer out, String str) throws IOException {
759 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
760 if (str != null) {
761 out.write(str);
762 }
763 return;
764 }
765 out.write(CSV_QUOTE);
766 for (int i = 0; i < str.length(); i++) {
767 char c = str.charAt(i);
768 if (c == CSV_QUOTE) {
769 out.write(CSV_QUOTE); // escape double quote
770 }
771 out.write(c);
772 }
773 out.write(CSV_QUOTE);
774 }
775
776 /**
777 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
778 *
779 * <p>If the value is enclosed in double quotes, and contains a comma, newline
780 * or double quote, then quotes are removed.
781 * </p>
782 *
783 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
784 * to just one double quote. </p>
785 *
786 * <p>If the value is not enclosed in double quotes, or is and does not contain a
787 * comma, newline or double quote, then the String value is returned unchanged.</p>
788 * </p>
789 *
790 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
791 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
792 *
793 * @param str the input CSV column String, may be null
794 * @return the input String, with enclosing double quotes removed and embedded double
795 * quotes unescaped, <code>null</code> if null string input
796 * @since 2.4
797 */
798 public static String unescapeCsv(String str) {
799 if (str == null) {
800 return null;
801 }
802 try {
803 StringWriter writer = new StringWriter();
804 unescapeCsv(writer, str);
805 return writer.toString();
806 } catch (IOException ioe) {
807 // this should never ever happen while writing to a StringWriter
808 throw new UnhandledException(ioe);
809 }
810 }
811
812 /**
813 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
814 *
815 * <p>If the value is enclosed in double quotes, and contains a comma, newline
816 * or double quote, then quotes are removed.
817 * </p>
818 *
819 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
820 * to just one double quote. </p>
821 *
822 * <p>If the value is not enclosed in double quotes, or is and does not contain a
823 * comma, newline or double quote, then the String value is returned unchanged.</p>
824 * </p>
825 *
826 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
827 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
828 *
829 * @param str the input CSV column String, may be null
830 * @param out Writer to write the input String to, with enclosing double quotes
831 * removed and embedded double quotes unescaped, <code>null</code> if null string input
832 * @throws IOException if error occurs on underlying Writer
833 * @since 2.4
834 */
835 public static void unescapeCsv(Writer out, String str) throws IOException {
836 if (str == null) {
837 return;
838 }
839 if (str.length() < 2) {
840 out.write(str);
841 return;
842 }
843 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
844 out.write(str);
845 return;
846 }
847
848 // strip quotes
849 String quoteless = str.substring(1, str.length() - 1);
850
851 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
852 // deal with escaped quotes; ie) ""
853 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
854 }
855
856 out.write(str);
857 }
858
859 }