001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 import java.io.IOException; 020 import java.io.StringWriter; 021 import java.io.Writer; 022 import java.util.Locale; 023 024 import org.apache.commons.lang.exception.NestableRuntimeException; 025 026 /** 027 * <p>Escapes and unescapes <code>String</code>s for 028 * Java, Java Script, HTML, XML, and SQL.</p> 029 * 030 * @author Apache Software Foundation 031 * @author Apache Jakarta Turbine 032 * @author Purple Technology 033 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> 034 * @author Antony Riley 035 * @author Helge Tesgaard 036 * @author <a href="sean@boohai.com">Sean Brown</a> 037 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> 038 * @author Phil Steitz 039 * @author Pete Gieser 040 * @since 2.0 041 * @version $Id: StringEscapeUtils.java 905636 2010-02-02 14:03:32Z niallp $ 042 */ 043 public class StringEscapeUtils { 044 045 private static final char CSV_DELIMITER = ','; 046 private static final char CSV_QUOTE = '"'; 047 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 048 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 049 050 /** 051 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in 052 * standard programming.</p> 053 * 054 * <p>Instead, the class should be used as: 055 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> 056 * 057 * <p>This constructor is public to permit tools that require a JavaBean 058 * instance to operate.</p> 059 */ 060 public StringEscapeUtils() { 061 super(); 062 } 063 064 // Java and JavaScript 065 //-------------------------------------------------------------------------- 066 /** 067 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p> 068 * 069 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 070 * 071 * <p>So a tab becomes the characters <code>'\\'</code> and 072 * <code>'t'</code>.</p> 073 * 074 * <p>The only difference between Java strings and JavaScript strings 075 * is that in JavaScript, a single quote must be escaped.</p> 076 * 077 * <p>Example: 078 * <pre> 079 * input string: He didn't say, "Stop!" 080 * output string: He didn't say, \"Stop!\" 081 * </pre> 082 * </p> 083 * 084 * @param str String to escape values in, may be null 085 * @return String with escaped values, <code>null</code> if null string input 086 */ 087 public static String escapeJava(String str) { 088 return escapeJavaStyleString(str, false, false); 089 } 090 091 /** 092 * <p>Escapes the characters in a <code>String</code> using Java String rules to 093 * a <code>Writer</code>.</p> 094 * 095 * <p>A <code>null</code> string input has no effect.</p> 096 * 097 * @see #escapeJava(java.lang.String) 098 * @param out Writer to write escaped string into 099 * @param str String to escape values in, may be null 100 * @throws IllegalArgumentException if the Writer is <code>null</code> 101 * @throws IOException if error occurs on underlying Writer 102 */ 103 public static void escapeJava(Writer out, String str) throws IOException { 104 escapeJavaStyleString(out, str, false, false); 105 } 106 107 /** 108 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p> 109 * <p>Escapes any values it finds into their JavaScript String form. 110 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 111 * 112 * <p>So a tab becomes the characters <code>'\\'</code> and 113 * <code>'t'</code>.</p> 114 * 115 * <p>The only difference between Java strings and JavaScript strings 116 * is that in JavaScript, a single quote must be escaped.</p> 117 * 118 * <p>Example: 119 * <pre> 120 * input string: He didn't say, "Stop!" 121 * output string: He didn\'t say, \"Stop!\" 122 * </pre> 123 * </p> 124 * 125 * @param str String to escape values in, may be null 126 * @return String with escaped values, <code>null</code> if null string input 127 */ 128 public static String escapeJavaScript(String str) { 129 return escapeJavaStyleString(str, true, true); 130 } 131 132 /** 133 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules 134 * to a <code>Writer</code>.</p> 135 * 136 * <p>A <code>null</code> string input has no effect.</p> 137 * 138 * @see #escapeJavaScript(java.lang.String) 139 * @param out Writer to write escaped string into 140 * @param str String to escape values in, may be null 141 * @throws IllegalArgumentException if the Writer is <code>null</code> 142 * @throws IOException if error occurs on underlying Writer 143 **/ 144 public static void escapeJavaScript(Writer out, String str) throws IOException { 145 escapeJavaStyleString(out, str, true, true); 146 } 147 148 /** 149 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 150 * 151 * @param str String to escape values in, may be null 152 * @param escapeSingleQuotes escapes single quotes if <code>true</code> 153 * @param escapeForwardSlash TODO 154 * @return the escaped string 155 */ 156 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) { 157 if (str == null) { 158 return null; 159 } 160 try { 161 StringWriter writer = new StringWriter(str.length() * 2); 162 escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash); 163 return writer.toString(); 164 } catch (IOException ioe) { 165 // this should never ever happen while writing to a StringWriter 166 throw new UnhandledException(ioe); 167 } 168 } 169 170 /** 171 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 172 * 173 * @param out write to receieve the escaped string 174 * @param str String to escape values in, may be null 175 * @param escapeSingleQuote escapes single quotes if <code>true</code> 176 * @param escapeForwardSlash TODO 177 * @throws IOException if an IOException occurs 178 */ 179 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote, 180 boolean escapeForwardSlash) throws IOException { 181 if (out == null) { 182 throw new IllegalArgumentException("The Writer must not be null"); 183 } 184 if (str == null) { 185 return; 186 } 187 int sz; 188 sz = str.length(); 189 for (int i = 0; i < sz; i++) { 190 char ch = str.charAt(i); 191 192 // handle unicode 193 if (ch > 0xfff) { 194 out.write("\\u" + hex(ch)); 195 } else if (ch > 0xff) { 196 out.write("\\u0" + hex(ch)); 197 } else if (ch > 0x7f) { 198 out.write("\\u00" + hex(ch)); 199 } else if (ch < 32) { 200 switch (ch) { 201 case '\b' : 202 out.write('\\'); 203 out.write('b'); 204 break; 205 case '\n' : 206 out.write('\\'); 207 out.write('n'); 208 break; 209 case '\t' : 210 out.write('\\'); 211 out.write('t'); 212 break; 213 case '\f' : 214 out.write('\\'); 215 out.write('f'); 216 break; 217 case '\r' : 218 out.write('\\'); 219 out.write('r'); 220 break; 221 default : 222 if (ch > 0xf) { 223 out.write("\\u00" + hex(ch)); 224 } else { 225 out.write("\\u000" + hex(ch)); 226 } 227 break; 228 } 229 } else { 230 switch (ch) { 231 case '\'' : 232 if (escapeSingleQuote) { 233 out.write('\\'); 234 } 235 out.write('\''); 236 break; 237 case '"' : 238 out.write('\\'); 239 out.write('"'); 240 break; 241 case '\\' : 242 out.write('\\'); 243 out.write('\\'); 244 break; 245 case '/' : 246 if (escapeForwardSlash) { 247 out.write('\\'); 248 } 249 out.write('/'); 250 break; 251 default : 252 out.write(ch); 253 break; 254 } 255 } 256 } 257 } 258 259 /** 260 * <p>Returns an upper case hexadecimal <code>String</code> for the given 261 * character.</p> 262 * 263 * @param ch The character to convert. 264 * @return An upper case hexadecimal <code>String</code> 265 */ 266 private static String hex(char ch) { 267 return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); 268 } 269 270 /** 271 * <p>Unescapes any Java literals found in the <code>String</code>. 272 * For example, it will turn a sequence of <code>'\'</code> and 273 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 274 * is preceded by another <code>'\'</code>.</p> 275 * 276 * @param str the <code>String</code> to unescape, may be null 277 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 278 */ 279 public static String unescapeJava(String str) { 280 if (str == null) { 281 return null; 282 } 283 try { 284 StringWriter writer = new StringWriter(str.length()); 285 unescapeJava(writer, str); 286 return writer.toString(); 287 } catch (IOException ioe) { 288 // this should never ever happen while writing to a StringWriter 289 throw new UnhandledException(ioe); 290 } 291 } 292 293 /** 294 * <p>Unescapes any Java literals found in the <code>String</code> to a 295 * <code>Writer</code>.</p> 296 * 297 * <p>For example, it will turn a sequence of <code>'\'</code> and 298 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 299 * is preceded by another <code>'\'</code>.</p> 300 * 301 * <p>A <code>null</code> string input has no effect.</p> 302 * 303 * @param out the <code>Writer</code> used to output unescaped characters 304 * @param str the <code>String</code> to unescape, may be null 305 * @throws IllegalArgumentException if the Writer is <code>null</code> 306 * @throws IOException if error occurs on underlying Writer 307 */ 308 public static void unescapeJava(Writer out, String str) throws IOException { 309 if (out == null) { 310 throw new IllegalArgumentException("The Writer must not be null"); 311 } 312 if (str == null) { 313 return; 314 } 315 int sz = str.length(); 316 StringBuffer unicode = new StringBuffer(4); 317 boolean hadSlash = false; 318 boolean inUnicode = false; 319 for (int i = 0; i < sz; i++) { 320 char ch = str.charAt(i); 321 if (inUnicode) { 322 // if in unicode, then we're reading unicode 323 // values in somehow 324 unicode.append(ch); 325 if (unicode.length() == 4) { 326 // unicode now contains the four hex digits 327 // which represents our unicode character 328 try { 329 int value = Integer.parseInt(unicode.toString(), 16); 330 out.write((char) value); 331 unicode.setLength(0); 332 inUnicode = false; 333 hadSlash = false; 334 } catch (NumberFormatException nfe) { 335 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe); 336 } 337 } 338 continue; 339 } 340 if (hadSlash) { 341 // handle an escaped value 342 hadSlash = false; 343 switch (ch) { 344 case '\\': 345 out.write('\\'); 346 break; 347 case '\'': 348 out.write('\''); 349 break; 350 case '\"': 351 out.write('"'); 352 break; 353 case 'r': 354 out.write('\r'); 355 break; 356 case 'f': 357 out.write('\f'); 358 break; 359 case 't': 360 out.write('\t'); 361 break; 362 case 'n': 363 out.write('\n'); 364 break; 365 case 'b': 366 out.write('\b'); 367 break; 368 case 'u': 369 { 370 // uh-oh, we're in unicode country.... 371 inUnicode = true; 372 break; 373 } 374 default : 375 out.write(ch); 376 break; 377 } 378 continue; 379 } else if (ch == '\\') { 380 hadSlash = true; 381 continue; 382 } 383 out.write(ch); 384 } 385 if (hadSlash) { 386 // then we're in the weird case of a \ at the end of the 387 // string, let's output it anyway. 388 out.write('\\'); 389 } 390 } 391 392 /** 393 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p> 394 * 395 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 396 * into a newline character, unless the <code>'\'</code> is preceded by another 397 * <code>'\'</code>.</p> 398 * 399 * @see #unescapeJava(String) 400 * @param str the <code>String</code> to unescape, may be null 401 * @return A new unescaped <code>String</code>, <code>null</code> if null string input 402 */ 403 public static String unescapeJavaScript(String str) { 404 return unescapeJava(str); 405 } 406 407 /** 408 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a 409 * <code>Writer</code>.</p> 410 * 411 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 412 * into a newline character, unless the <code>'\'</code> is preceded by another 413 * <code>'\'</code>.</p> 414 * 415 * <p>A <code>null</code> string input has no effect.</p> 416 * 417 * @see #unescapeJava(Writer,String) 418 * @param out the <code>Writer</code> used to output unescaped characters 419 * @param str the <code>String</code> to unescape, may be null 420 * @throws IllegalArgumentException if the Writer is <code>null</code> 421 * @throws IOException if error occurs on underlying Writer 422 */ 423 public static void unescapeJavaScript(Writer out, String str) throws IOException { 424 unescapeJava(out, str); 425 } 426 427 // HTML and XML 428 //-------------------------------------------------------------------------- 429 /** 430 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p> 431 * 432 * <p> 433 * For example: 434 * </p> 435 * <p><code>"bread" & "butter"</code></p> 436 * becomes: 437 * <p> 438 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 439 * </p> 440 * 441 * <p>Supports all known HTML 4.0 entities, including funky accents. 442 * Note that the commonly used apostrophe escape character (&apos;) 443 * is not a legal entity and so is not supported). </p> 444 * 445 * @param str the <code>String</code> to escape, may be null 446 * @return a new escaped <code>String</code>, <code>null</code> if null string input 447 * 448 * @see #unescapeHtml(String) 449 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 450 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 451 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 452 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 453 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 454 */ 455 public static String escapeHtml(String str) { 456 if (str == null) { 457 return null; 458 } 459 try { 460 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 461 escapeHtml(writer, str); 462 return writer.toString(); 463 } catch (IOException ioe) { 464 //should be impossible 465 throw new UnhandledException(ioe); 466 } 467 } 468 469 /** 470 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes 471 * them to a <code>Writer</code>.</p> 472 * 473 * <p> 474 * For example: 475 * </p> 476 * <code>"bread" & "butter"</code> 477 * <p>becomes:</p> 478 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 479 * 480 * <p>Supports all known HTML 4.0 entities, including funky accents. 481 * Note that the commonly used apostrophe escape character (&apos;) 482 * is not a legal entity and so is not supported). </p> 483 * 484 * @param writer the writer receiving the escaped string, not null 485 * @param string the <code>String</code> to escape, may be null 486 * @throws IllegalArgumentException if the writer is null 487 * @throws IOException when <code>Writer</code> passed throws the exception from 488 * calls to the {@link Writer#write(int)} methods. 489 * 490 * @see #escapeHtml(String) 491 * @see #unescapeHtml(String) 492 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 493 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 494 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 495 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 496 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 497 */ 498 public static void escapeHtml(Writer writer, String string) throws IOException { 499 if (writer == null ) { 500 throw new IllegalArgumentException ("The Writer must not be null."); 501 } 502 if (string == null) { 503 return; 504 } 505 Entities.HTML40.escape(writer, string); 506 } 507 508 //----------------------------------------------------------------------- 509 /** 510 * <p>Unescapes a string containing entity escapes to a string 511 * containing the actual Unicode characters corresponding to the 512 * escapes. Supports HTML 4.0 entities.</p> 513 * 514 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 515 * will become "<Français>"</p> 516 * 517 * <p>If an entity is unrecognized, it is left alone, and inserted 518 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 519 * become ">&zzzz;x".</p> 520 * 521 * @param str the <code>String</code> to unescape, may be null 522 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 523 * @see #escapeHtml(Writer, String) 524 */ 525 public static String unescapeHtml(String str) { 526 if (str == null) { 527 return null; 528 } 529 try { 530 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 531 unescapeHtml(writer, str); 532 return writer.toString(); 533 } catch (IOException ioe) { 534 //should be impossible 535 throw new UnhandledException(ioe); 536 } 537 } 538 539 /** 540 * <p>Unescapes a string containing entity escapes to a string 541 * containing the actual Unicode characters corresponding to the 542 * escapes. Supports HTML 4.0 entities.</p> 543 * 544 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 545 * will become "<Français>"</p> 546 * 547 * <p>If an entity is unrecognized, it is left alone, and inserted 548 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 549 * become ">&zzzz;x".</p> 550 * 551 * @param writer the writer receiving the unescaped string, not null 552 * @param string the <code>String</code> to unescape, may be null 553 * @throws IllegalArgumentException if the writer is null 554 * @throws IOException if an IOException occurs 555 * @see #escapeHtml(String) 556 */ 557 public static void unescapeHtml(Writer writer, String string) throws IOException { 558 if (writer == null ) { 559 throw new IllegalArgumentException ("The Writer must not be null."); 560 } 561 if (string == null) { 562 return; 563 } 564 Entities.HTML40.unescape(writer, string); 565 } 566 567 //----------------------------------------------------------------------- 568 /** 569 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 570 * 571 * <p>For example: <tt>"bread" & "butter"</tt> => 572 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 573 * </p> 574 * 575 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 576 * Does not support DTDs or external entities.</p> 577 * 578 * <p>Note that unicode characters greater than 0x7f are currently escaped to 579 * their numerical \\u equivalent. This may change in future releases. </p> 580 * 581 * @param writer the writer receiving the unescaped string, not null 582 * @param str the <code>String</code> to escape, may be null 583 * @throws IllegalArgumentException if the writer is null 584 * @throws IOException if there is a problem writing 585 * @see #unescapeXml(java.lang.String) 586 */ 587 public static void escapeXml(Writer writer, String str) throws IOException { 588 if (writer == null ) { 589 throw new IllegalArgumentException ("The Writer must not be null."); 590 } 591 if (str == null) { 592 return; 593 } 594 Entities.XML.escape(writer, str); 595 } 596 597 /** 598 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 599 * 600 * <p>For example: <tt>"bread" & "butter"</tt> => 601 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 602 * </p> 603 * 604 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 605 * Does not support DTDs or external entities.</p> 606 * 607 * <p>Note that unicode characters greater than 0x7f are currently escaped to 608 * their numerical \\u equivalent. This may change in future releases. </p> 609 * 610 * @param str the <code>String</code> to escape, may be null 611 * @return a new escaped <code>String</code>, <code>null</code> if null string input 612 * @see #unescapeXml(java.lang.String) 613 */ 614 public static String escapeXml(String str) { 615 if (str == null) { 616 return null; 617 } 618 return Entities.XML.escape(str); 619 } 620 621 //----------------------------------------------------------------------- 622 /** 623 * <p>Unescapes a string containing XML entity escapes to a string 624 * containing the actual Unicode characters corresponding to the 625 * escapes.</p> 626 * 627 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 628 * Does not support DTDs or external entities.</p> 629 * 630 * <p>Note that numerical \\u unicode codes are unescaped to their respective 631 * unicode characters. This may change in future releases. </p> 632 * 633 * @param writer the writer receiving the unescaped string, not null 634 * @param str the <code>String</code> to unescape, may be null 635 * @throws IllegalArgumentException if the writer is null 636 * @throws IOException if there is a problem writing 637 * @see #escapeXml(String) 638 */ 639 public static void unescapeXml(Writer writer, String str) throws IOException { 640 if (writer == null ) { 641 throw new IllegalArgumentException ("The Writer must not be null."); 642 } 643 if (str == null) { 644 return; 645 } 646 Entities.XML.unescape(writer, str); 647 } 648 649 /** 650 * <p>Unescapes a string containing XML entity escapes to a string 651 * containing the actual Unicode characters corresponding to the 652 * escapes.</p> 653 * 654 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 655 * Does not support DTDs or external entities.</p> 656 * 657 * <p>Note that numerical \\u unicode codes are unescaped to their respective 658 * unicode characters. This may change in future releases. </p> 659 * 660 * @param str the <code>String</code> to unescape, may be null 661 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 662 * @see #escapeXml(String) 663 */ 664 public static String unescapeXml(String str) { 665 if (str == null) { 666 return null; 667 } 668 return Entities.XML.unescape(str); 669 } 670 671 //----------------------------------------------------------------------- 672 /** 673 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to 674 * an SQL query.</p> 675 * 676 * <p>For example, 677 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 678 * StringEscapeUtils.escapeSql("McHale's Navy") + 679 * "'");</pre> 680 * </p> 681 * 682 * <p>At present, this method only turns single-quotes into doubled single-quotes 683 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not 684 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p> 685 * 686 * see http://www.jguru.com/faq/view.jsp?EID=8881 687 * @param str the string to escape, may be null 688 * @return a new String, escaped for SQL, <code>null</code> if null string input 689 */ 690 public static String escapeSql(String str) { 691 if (str == null) { 692 return null; 693 } 694 return StringUtils.replace(str, "'", "''"); 695 } 696 697 //----------------------------------------------------------------------- 698 699 /** 700 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes, 701 * if required.</p> 702 * 703 * <p>If the value contains a comma, newline or double quote, then the 704 * String value is returned enclosed in double quotes.</p> 705 * </p> 706 * 707 * <p>Any double quote characters in the value are escaped with another double quote.</p> 708 * 709 * <p>If the value does not contain a comma, newline or double quote, then the 710 * String value is returned unchanged.</p> 711 * </p> 712 * 713 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 714 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 715 * 716 * @param str the input CSV column String, may be null 717 * @return the input String, enclosed in double quotes if the value contains a comma, 718 * newline or double quote, <code>null</code> if null string input 719 * @since 2.4 720 */ 721 public static String escapeCsv(String str) { 722 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 723 return str; 724 } 725 try { 726 StringWriter writer = new StringWriter(); 727 escapeCsv(writer, str); 728 return writer.toString(); 729 } catch (IOException ioe) { 730 // this should never ever happen while writing to a StringWriter 731 throw new UnhandledException(ioe); 732 } 733 } 734 735 /** 736 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes, 737 * if required.</p> 738 * 739 * <p>If the value contains a comma, newline or double quote, then the 740 * String value is written enclosed in double quotes.</p> 741 * </p> 742 * 743 * <p>Any double quote characters in the value are escaped with another double quote.</p> 744 * 745 * <p>If the value does not contain a comma, newline or double quote, then the 746 * String value is written unchanged (null values are ignored).</p> 747 * </p> 748 * 749 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 750 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 751 * 752 * @param str the input CSV column String, may be null 753 * @param out Writer to write input string to, enclosed in double quotes if it contains 754 * a comma, newline or double quote 755 * @throws IOException if error occurs on underlying Writer 756 * @since 2.4 757 */ 758 public static void escapeCsv(Writer out, String str) throws IOException { 759 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 760 if (str != null) { 761 out.write(str); 762 } 763 return; 764 } 765 out.write(CSV_QUOTE); 766 for (int i = 0; i < str.length(); i++) { 767 char c = str.charAt(i); 768 if (c == CSV_QUOTE) { 769 out.write(CSV_QUOTE); // escape double quote 770 } 771 out.write(c); 772 } 773 out.write(CSV_QUOTE); 774 } 775 776 /** 777 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 778 * 779 * <p>If the value is enclosed in double quotes, and contains a comma, newline 780 * or double quote, then quotes are removed. 781 * </p> 782 * 783 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 784 * to just one double quote. </p> 785 * 786 * <p>If the value is not enclosed in double quotes, or is and does not contain a 787 * comma, newline or double quote, then the String value is returned unchanged.</p> 788 * </p> 789 * 790 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 791 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 792 * 793 * @param str the input CSV column String, may be null 794 * @return the input String, with enclosing double quotes removed and embedded double 795 * quotes unescaped, <code>null</code> if null string input 796 * @since 2.4 797 */ 798 public static String unescapeCsv(String str) { 799 if (str == null) { 800 return null; 801 } 802 try { 803 StringWriter writer = new StringWriter(); 804 unescapeCsv(writer, str); 805 return writer.toString(); 806 } catch (IOException ioe) { 807 // this should never ever happen while writing to a StringWriter 808 throw new UnhandledException(ioe); 809 } 810 } 811 812 /** 813 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 814 * 815 * <p>If the value is enclosed in double quotes, and contains a comma, newline 816 * or double quote, then quotes are removed. 817 * </p> 818 * 819 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 820 * to just one double quote. </p> 821 * 822 * <p>If the value is not enclosed in double quotes, or is and does not contain a 823 * comma, newline or double quote, then the String value is returned unchanged.</p> 824 * </p> 825 * 826 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 827 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 828 * 829 * @param str the input CSV column String, may be null 830 * @param out Writer to write the input String to, with enclosing double quotes 831 * removed and embedded double quotes unescaped, <code>null</code> if null string input 832 * @throws IOException if error occurs on underlying Writer 833 * @since 2.4 834 */ 835 public static void unescapeCsv(Writer out, String str) throws IOException { 836 if (str == null) { 837 return; 838 } 839 if (str.length() < 2) { 840 out.write(str); 841 return; 842 } 843 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) { 844 out.write(str); 845 return; 846 } 847 848 // strip quotes 849 String quoteless = str.substring(1, str.length() - 1); 850 851 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 852 // deal with escaped quotes; ie) "" 853 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR); 854 } 855 856 out.write(str); 857 } 858 859 }