001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 import java.io.IOException; 020 import java.io.StringWriter; 021 import java.io.Writer; 022 import java.util.Locale; 023 024 import org.apache.commons.lang.exception.NestableRuntimeException; 025 import org.apache.commons.lang.text.StrBuilder; 026 027 /** 028 * <p>Escapes and unescapes <code>String</code>s for 029 * Java, Java Script, HTML, XML, and SQL.</p> 030 * 031 * <p>#ThreadSafe#</p> 032 * @author Apache Software Foundation 033 * @author Apache Jakarta Turbine 034 * @author Purple Technology 035 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> 036 * @author Antony Riley 037 * @author Helge Tesgaard 038 * @author <a href="sean@boohai.com">Sean Brown</a> 039 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> 040 * @author Phil Steitz 041 * @author Pete Gieser 042 * @since 2.0 043 * @version $Id: StringEscapeUtils.java 1057072 2011-01-10 01:55:57Z niallp $ 044 */ 045 public class StringEscapeUtils { 046 047 private static final char CSV_DELIMITER = ','; 048 private static final char CSV_QUOTE = '"'; 049 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 050 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 051 052 /** 053 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in 054 * standard programming.</p> 055 * 056 * <p>Instead, the class should be used as: 057 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> 058 * 059 * <p>This constructor is public to permit tools that require a JavaBean 060 * instance to operate.</p> 061 */ 062 public StringEscapeUtils() { 063 super(); 064 } 065 066 // Java and JavaScript 067 //-------------------------------------------------------------------------- 068 /** 069 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p> 070 * 071 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 072 * 073 * <p>So a tab becomes the characters <code>'\\'</code> and 074 * <code>'t'</code>.</p> 075 * 076 * <p>The only difference between Java strings and JavaScript strings 077 * is that in JavaScript, a single quote must be escaped.</p> 078 * 079 * <p>Example: 080 * <pre> 081 * input string: He didn't say, "Stop!" 082 * output string: He didn't say, \"Stop!\" 083 * </pre> 084 * </p> 085 * 086 * @param str String to escape values in, may be null 087 * @return String with escaped values, <code>null</code> if null string input 088 */ 089 public static String escapeJava(String str) { 090 return escapeJavaStyleString(str, false, false); 091 } 092 093 /** 094 * <p>Escapes the characters in a <code>String</code> using Java String rules to 095 * a <code>Writer</code>.</p> 096 * 097 * <p>A <code>null</code> string input has no effect.</p> 098 * 099 * @see #escapeJava(java.lang.String) 100 * @param out Writer to write escaped string into 101 * @param str String to escape values in, may be null 102 * @throws IllegalArgumentException if the Writer is <code>null</code> 103 * @throws IOException if error occurs on underlying Writer 104 */ 105 public static void escapeJava(Writer out, String str) throws IOException { 106 escapeJavaStyleString(out, str, false, false); 107 } 108 109 /** 110 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p> 111 * <p>Escapes any values it finds into their JavaScript String form. 112 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 113 * 114 * <p>So a tab becomes the characters <code>'\\'</code> and 115 * <code>'t'</code>.</p> 116 * 117 * <p>The only difference between Java strings and JavaScript strings 118 * is that in JavaScript, a single quote must be escaped.</p> 119 * 120 * <p>Example: 121 * <pre> 122 * input string: He didn't say, "Stop!" 123 * output string: He didn\'t say, \"Stop!\" 124 * </pre> 125 * </p> 126 * 127 * @param str String to escape values in, may be null 128 * @return String with escaped values, <code>null</code> if null string input 129 */ 130 public static String escapeJavaScript(String str) { 131 return escapeJavaStyleString(str, true, true); 132 } 133 134 /** 135 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules 136 * to a <code>Writer</code>.</p> 137 * 138 * <p>A <code>null</code> string input has no effect.</p> 139 * 140 * @see #escapeJavaScript(java.lang.String) 141 * @param out Writer to write escaped string into 142 * @param str String to escape values in, may be null 143 * @throws IllegalArgumentException if the Writer is <code>null</code> 144 * @throws IOException if error occurs on underlying Writer 145 **/ 146 public static void escapeJavaScript(Writer out, String str) throws IOException { 147 escapeJavaStyleString(out, str, true, true); 148 } 149 150 /** 151 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 152 * 153 * @param str String to escape values in, may be null 154 * @param escapeSingleQuotes escapes single quotes if <code>true</code> 155 * @param escapeForwardSlash TODO 156 * @return the escaped string 157 */ 158 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) { 159 if (str == null) { 160 return null; 161 } 162 try { 163 StringWriter writer = new StringWriter(str.length() * 2); 164 escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash); 165 return writer.toString(); 166 } catch (IOException ioe) { 167 // this should never ever happen while writing to a StringWriter 168 throw new UnhandledException(ioe); 169 } 170 } 171 172 /** 173 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p> 174 * 175 * @param out write to receieve the escaped string 176 * @param str String to escape values in, may be null 177 * @param escapeSingleQuote escapes single quotes if <code>true</code> 178 * @param escapeForwardSlash TODO 179 * @throws IOException if an IOException occurs 180 */ 181 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote, 182 boolean escapeForwardSlash) throws IOException { 183 if (out == null) { 184 throw new IllegalArgumentException("The Writer must not be null"); 185 } 186 if (str == null) { 187 return; 188 } 189 int sz; 190 sz = str.length(); 191 for (int i = 0; i < sz; i++) { 192 char ch = str.charAt(i); 193 194 // handle unicode 195 if (ch > 0xfff) { 196 out.write("\\u" + hex(ch)); 197 } else if (ch > 0xff) { 198 out.write("\\u0" + hex(ch)); 199 } else if (ch > 0x7f) { 200 out.write("\\u00" + hex(ch)); 201 } else if (ch < 32) { 202 switch (ch) { 203 case '\b' : 204 out.write('\\'); 205 out.write('b'); 206 break; 207 case '\n' : 208 out.write('\\'); 209 out.write('n'); 210 break; 211 case '\t' : 212 out.write('\\'); 213 out.write('t'); 214 break; 215 case '\f' : 216 out.write('\\'); 217 out.write('f'); 218 break; 219 case '\r' : 220 out.write('\\'); 221 out.write('r'); 222 break; 223 default : 224 if (ch > 0xf) { 225 out.write("\\u00" + hex(ch)); 226 } else { 227 out.write("\\u000" + hex(ch)); 228 } 229 break; 230 } 231 } else { 232 switch (ch) { 233 case '\'' : 234 if (escapeSingleQuote) { 235 out.write('\\'); 236 } 237 out.write('\''); 238 break; 239 case '"' : 240 out.write('\\'); 241 out.write('"'); 242 break; 243 case '\\' : 244 out.write('\\'); 245 out.write('\\'); 246 break; 247 case '/' : 248 if (escapeForwardSlash) { 249 out.write('\\'); 250 } 251 out.write('/'); 252 break; 253 default : 254 out.write(ch); 255 break; 256 } 257 } 258 } 259 } 260 261 /** 262 * <p>Returns an upper case hexadecimal <code>String</code> for the given 263 * character.</p> 264 * 265 * @param ch The character to convert. 266 * @return An upper case hexadecimal <code>String</code> 267 */ 268 private static String hex(char ch) { 269 return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); 270 } 271 272 /** 273 * <p>Unescapes any Java literals found in the <code>String</code>. 274 * For example, it will turn a sequence of <code>'\'</code> and 275 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 276 * is preceded by another <code>'\'</code>.</p> 277 * 278 * @param str the <code>String</code> to unescape, may be null 279 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 280 */ 281 public static String unescapeJava(String str) { 282 if (str == null) { 283 return null; 284 } 285 try { 286 StringWriter writer = new StringWriter(str.length()); 287 unescapeJava(writer, str); 288 return writer.toString(); 289 } catch (IOException ioe) { 290 // this should never ever happen while writing to a StringWriter 291 throw new UnhandledException(ioe); 292 } 293 } 294 295 /** 296 * <p>Unescapes any Java literals found in the <code>String</code> to a 297 * <code>Writer</code>.</p> 298 * 299 * <p>For example, it will turn a sequence of <code>'\'</code> and 300 * <code>'n'</code> into a newline character, unless the <code>'\'</code> 301 * is preceded by another <code>'\'</code>.</p> 302 * 303 * <p>A <code>null</code> string input has no effect.</p> 304 * 305 * @param out the <code>Writer</code> used to output unescaped characters 306 * @param str the <code>String</code> to unescape, may be null 307 * @throws IllegalArgumentException if the Writer is <code>null</code> 308 * @throws IOException if error occurs on underlying Writer 309 */ 310 public static void unescapeJava(Writer out, String str) throws IOException { 311 if (out == null) { 312 throw new IllegalArgumentException("The Writer must not be null"); 313 } 314 if (str == null) { 315 return; 316 } 317 int sz = str.length(); 318 StrBuilder unicode = new StrBuilder(4); 319 boolean hadSlash = false; 320 boolean inUnicode = false; 321 for (int i = 0; i < sz; i++) { 322 char ch = str.charAt(i); 323 if (inUnicode) { 324 // if in unicode, then we're reading unicode 325 // values in somehow 326 unicode.append(ch); 327 if (unicode.length() == 4) { 328 // unicode now contains the four hex digits 329 // which represents our unicode character 330 try { 331 int value = Integer.parseInt(unicode.toString(), 16); 332 out.write((char) value); 333 unicode.setLength(0); 334 inUnicode = false; 335 hadSlash = false; 336 } catch (NumberFormatException nfe) { 337 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe); 338 } 339 } 340 continue; 341 } 342 if (hadSlash) { 343 // handle an escaped value 344 hadSlash = false; 345 switch (ch) { 346 case '\\': 347 out.write('\\'); 348 break; 349 case '\'': 350 out.write('\''); 351 break; 352 case '\"': 353 out.write('"'); 354 break; 355 case 'r': 356 out.write('\r'); 357 break; 358 case 'f': 359 out.write('\f'); 360 break; 361 case 't': 362 out.write('\t'); 363 break; 364 case 'n': 365 out.write('\n'); 366 break; 367 case 'b': 368 out.write('\b'); 369 break; 370 case 'u': 371 { 372 // uh-oh, we're in unicode country.... 373 inUnicode = true; 374 break; 375 } 376 default : 377 out.write(ch); 378 break; 379 } 380 continue; 381 } else if (ch == '\\') { 382 hadSlash = true; 383 continue; 384 } 385 out.write(ch); 386 } 387 if (hadSlash) { 388 // then we're in the weird case of a \ at the end of the 389 // string, let's output it anyway. 390 out.write('\\'); 391 } 392 } 393 394 /** 395 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p> 396 * 397 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 398 * into a newline character, unless the <code>'\'</code> is preceded by another 399 * <code>'\'</code>.</p> 400 * 401 * @see #unescapeJava(String) 402 * @param str the <code>String</code> to unescape, may be null 403 * @return A new unescaped <code>String</code>, <code>null</code> if null string input 404 */ 405 public static String unescapeJavaScript(String str) { 406 return unescapeJava(str); 407 } 408 409 /** 410 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a 411 * <code>Writer</code>.</p> 412 * 413 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> 414 * into a newline character, unless the <code>'\'</code> is preceded by another 415 * <code>'\'</code>.</p> 416 * 417 * <p>A <code>null</code> string input has no effect.</p> 418 * 419 * @see #unescapeJava(Writer,String) 420 * @param out the <code>Writer</code> used to output unescaped characters 421 * @param str the <code>String</code> to unescape, may be null 422 * @throws IllegalArgumentException if the Writer is <code>null</code> 423 * @throws IOException if error occurs on underlying Writer 424 */ 425 public static void unescapeJavaScript(Writer out, String str) throws IOException { 426 unescapeJava(out, str); 427 } 428 429 // HTML and XML 430 //-------------------------------------------------------------------------- 431 /** 432 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p> 433 * 434 * <p> 435 * For example: 436 * </p> 437 * <p><code>"bread" & "butter"</code></p> 438 * becomes: 439 * <p> 440 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 441 * </p> 442 * 443 * <p>Supports all known HTML 4.0 entities, including funky accents. 444 * Note that the commonly used apostrophe escape character (&apos;) 445 * is not a legal entity and so is not supported). </p> 446 * 447 * @param str the <code>String</code> to escape, may be null 448 * @return a new escaped <code>String</code>, <code>null</code> if null string input 449 * 450 * @see #unescapeHtml(String) 451 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 452 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 453 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 454 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 455 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 456 */ 457 public static String escapeHtml(String str) { 458 if (str == null) { 459 return null; 460 } 461 try { 462 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 463 escapeHtml(writer, str); 464 return writer.toString(); 465 } catch (IOException ioe) { 466 //should be impossible 467 throw new UnhandledException(ioe); 468 } 469 } 470 471 /** 472 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes 473 * them to a <code>Writer</code>.</p> 474 * 475 * <p> 476 * For example: 477 * </p> 478 * <code>"bread" & "butter"</code> 479 * <p>becomes:</p> 480 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 481 * 482 * <p>Supports all known HTML 4.0 entities, including funky accents. 483 * Note that the commonly used apostrophe escape character (&apos;) 484 * is not a legal entity and so is not supported). </p> 485 * 486 * @param writer the writer receiving the escaped string, not null 487 * @param string the <code>String</code> to escape, may be null 488 * @throws IllegalArgumentException if the writer is null 489 * @throws IOException when <code>Writer</code> passed throws the exception from 490 * calls to the {@link Writer#write(int)} methods. 491 * 492 * @see #escapeHtml(String) 493 * @see #unescapeHtml(String) 494 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 495 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 496 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 497 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 498 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 499 */ 500 public static void escapeHtml(Writer writer, String string) throws IOException { 501 if (writer == null ) { 502 throw new IllegalArgumentException ("The Writer must not be null."); 503 } 504 if (string == null) { 505 return; 506 } 507 Entities.HTML40.escape(writer, string); 508 } 509 510 //----------------------------------------------------------------------- 511 /** 512 * <p>Unescapes a string containing entity escapes to a string 513 * containing the actual Unicode characters corresponding to the 514 * escapes. Supports HTML 4.0 entities.</p> 515 * 516 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 517 * will become "<Français>"</p> 518 * 519 * <p>If an entity is unrecognized, it is left alone, and inserted 520 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 521 * become ">&zzzz;x".</p> 522 * 523 * @param str the <code>String</code> to unescape, may be null 524 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 525 * @see #escapeHtml(Writer, String) 526 */ 527 public static String unescapeHtml(String str) { 528 if (str == null) { 529 return null; 530 } 531 try { 532 StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); 533 unescapeHtml(writer, str); 534 return writer.toString(); 535 } catch (IOException ioe) { 536 //should be impossible 537 throw new UnhandledException(ioe); 538 } 539 } 540 541 /** 542 * <p>Unescapes a string containing entity escapes to a string 543 * containing the actual Unicode characters corresponding to the 544 * escapes. Supports HTML 4.0 entities.</p> 545 * 546 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 547 * will become "<Français>"</p> 548 * 549 * <p>If an entity is unrecognized, it is left alone, and inserted 550 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 551 * become ">&zzzz;x".</p> 552 * 553 * @param writer the writer receiving the unescaped string, not null 554 * @param string the <code>String</code> to unescape, may be null 555 * @throws IllegalArgumentException if the writer is null 556 * @throws IOException if an IOException occurs 557 * @see #escapeHtml(String) 558 */ 559 public static void unescapeHtml(Writer writer, String string) throws IOException { 560 if (writer == null ) { 561 throw new IllegalArgumentException ("The Writer must not be null."); 562 } 563 if (string == null) { 564 return; 565 } 566 Entities.HTML40.unescape(writer, string); 567 } 568 569 //----------------------------------------------------------------------- 570 /** 571 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 572 * 573 * <p>For example: <tt>"bread" & "butter"</tt> => 574 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 575 * </p> 576 * 577 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 578 * Does not support DTDs or external entities.</p> 579 * 580 * <p>Note that unicode characters greater than 0x7f are currently escaped to 581 * their numerical \\u equivalent. This may change in future releases. </p> 582 * 583 * @param writer the writer receiving the unescaped string, not null 584 * @param str the <code>String</code> to escape, may be null 585 * @throws IllegalArgumentException if the writer is null 586 * @throws IOException if there is a problem writing 587 * @see #unescapeXml(java.lang.String) 588 */ 589 public static void escapeXml(Writer writer, String str) throws IOException { 590 if (writer == null ) { 591 throw new IllegalArgumentException ("The Writer must not be null."); 592 } 593 if (str == null) { 594 return; 595 } 596 Entities.XML.escape(writer, str); 597 } 598 599 /** 600 * <p>Escapes the characters in a <code>String</code> using XML entities.</p> 601 * 602 * <p>For example: <tt>"bread" & "butter"</tt> => 603 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 604 * </p> 605 * 606 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 607 * Does not support DTDs or external entities.</p> 608 * 609 * <p>Note that unicode characters greater than 0x7f are currently escaped to 610 * their numerical \\u equivalent. This may change in future releases. </p> 611 * 612 * @param str the <code>String</code> to escape, may be null 613 * @return a new escaped <code>String</code>, <code>null</code> if null string input 614 * @see #unescapeXml(java.lang.String) 615 */ 616 public static String escapeXml(String str) { 617 if (str == null) { 618 return null; 619 } 620 return Entities.XML.escape(str); 621 } 622 623 //----------------------------------------------------------------------- 624 /** 625 * <p>Unescapes a string containing XML entity escapes to a string 626 * containing the actual Unicode characters corresponding to the 627 * escapes.</p> 628 * 629 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 630 * Does not support DTDs or external entities.</p> 631 * 632 * <p>Note that numerical \\u unicode codes are unescaped to their respective 633 * unicode characters. This may change in future releases. </p> 634 * 635 * @param writer the writer receiving the unescaped string, not null 636 * @param str the <code>String</code> to unescape, may be null 637 * @throws IllegalArgumentException if the writer is null 638 * @throws IOException if there is a problem writing 639 * @see #escapeXml(String) 640 */ 641 public static void unescapeXml(Writer writer, String str) throws IOException { 642 if (writer == null ) { 643 throw new IllegalArgumentException ("The Writer must not be null."); 644 } 645 if (str == null) { 646 return; 647 } 648 Entities.XML.unescape(writer, str); 649 } 650 651 /** 652 * <p>Unescapes a string containing XML entity escapes to a string 653 * containing the actual Unicode characters corresponding to the 654 * escapes.</p> 655 * 656 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 657 * Does not support DTDs or external entities.</p> 658 * 659 * <p>Note that numerical \\u unicode codes are unescaped to their respective 660 * unicode characters. This may change in future releases. </p> 661 * 662 * @param str the <code>String</code> to unescape, may be null 663 * @return a new unescaped <code>String</code>, <code>null</code> if null string input 664 * @see #escapeXml(String) 665 */ 666 public static String unescapeXml(String str) { 667 if (str == null) { 668 return null; 669 } 670 return Entities.XML.unescape(str); 671 } 672 673 //----------------------------------------------------------------------- 674 /** 675 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to 676 * an SQL query.</p> 677 * 678 * <p>For example, 679 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 680 * StringEscapeUtils.escapeSql("McHale's Navy") + 681 * "'");</pre> 682 * </p> 683 * 684 * <p>At present, this method only turns single-quotes into doubled single-quotes 685 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not 686 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p> 687 * 688 * see http://www.jguru.com/faq/view.jsp?EID=8881 689 * @param str the string to escape, may be null 690 * @return a new String, escaped for SQL, <code>null</code> if null string input 691 */ 692 public static String escapeSql(String str) { 693 if (str == null) { 694 return null; 695 } 696 return StringUtils.replace(str, "'", "''"); 697 } 698 699 //----------------------------------------------------------------------- 700 701 /** 702 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes, 703 * if required.</p> 704 * 705 * <p>If the value contains a comma, newline or double quote, then the 706 * String value is returned enclosed in double quotes.</p> 707 * </p> 708 * 709 * <p>Any double quote characters in the value are escaped with another double quote.</p> 710 * 711 * <p>If the value does not contain a comma, newline or double quote, then the 712 * String value is returned unchanged.</p> 713 * </p> 714 * 715 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 716 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 717 * 718 * @param str the input CSV column String, may be null 719 * @return the input String, enclosed in double quotes if the value contains a comma, 720 * newline or double quote, <code>null</code> if null string input 721 * @since 2.4 722 */ 723 public static String escapeCsv(String str) { 724 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 725 return str; 726 } 727 try { 728 StringWriter writer = new StringWriter(); 729 escapeCsv(writer, str); 730 return writer.toString(); 731 } catch (IOException ioe) { 732 // this should never ever happen while writing to a StringWriter 733 throw new UnhandledException(ioe); 734 } 735 } 736 737 /** 738 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes, 739 * if required.</p> 740 * 741 * <p>If the value contains a comma, newline or double quote, then the 742 * String value is written enclosed in double quotes.</p> 743 * </p> 744 * 745 * <p>Any double quote characters in the value are escaped with another double quote.</p> 746 * 747 * <p>If the value does not contain a comma, newline or double quote, then the 748 * String value is written unchanged (null values are ignored).</p> 749 * </p> 750 * 751 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 752 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 753 * 754 * @param str the input CSV column String, may be null 755 * @param out Writer to write input string to, enclosed in double quotes if it contains 756 * a comma, newline or double quote 757 * @throws IOException if error occurs on underlying Writer 758 * @since 2.4 759 */ 760 public static void escapeCsv(Writer out, String str) throws IOException { 761 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { 762 if (str != null) { 763 out.write(str); 764 } 765 return; 766 } 767 out.write(CSV_QUOTE); 768 for (int i = 0; i < str.length(); i++) { 769 char c = str.charAt(i); 770 if (c == CSV_QUOTE) { 771 out.write(CSV_QUOTE); // escape double quote 772 } 773 out.write(c); 774 } 775 out.write(CSV_QUOTE); 776 } 777 778 /** 779 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 780 * 781 * <p>If the value is enclosed in double quotes, and contains a comma, newline 782 * or double quote, then quotes are removed. 783 * </p> 784 * 785 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 786 * to just one double quote. </p> 787 * 788 * <p>If the value is not enclosed in double quotes, or is and does not contain a 789 * comma, newline or double quote, then the String value is returned unchanged.</p> 790 * </p> 791 * 792 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 793 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 794 * 795 * @param str the input CSV column String, may be null 796 * @return the input String, with enclosing double quotes removed and embedded double 797 * quotes unescaped, <code>null</code> if null string input 798 * @since 2.4 799 */ 800 public static String unescapeCsv(String str) { 801 if (str == null) { 802 return null; 803 } 804 try { 805 StringWriter writer = new StringWriter(); 806 unescapeCsv(writer, str); 807 return writer.toString(); 808 } catch (IOException ioe) { 809 // this should never ever happen while writing to a StringWriter 810 throw new UnhandledException(ioe); 811 } 812 } 813 814 /** 815 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p> 816 * 817 * <p>If the value is enclosed in double quotes, and contains a comma, newline 818 * or double quote, then quotes are removed. 819 * </p> 820 * 821 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 822 * to just one double quote. </p> 823 * 824 * <p>If the value is not enclosed in double quotes, or is and does not contain a 825 * comma, newline or double quote, then the String value is returned unchanged.</p> 826 * </p> 827 * 828 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 829 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 830 * 831 * @param str the input CSV column String, may be null 832 * @param out Writer to write the input String to, with enclosing double quotes 833 * removed and embedded double quotes unescaped, <code>null</code> if null string input 834 * @throws IOException if error occurs on underlying Writer 835 * @since 2.4 836 */ 837 public static void unescapeCsv(Writer out, String str) throws IOException { 838 if (str == null) { 839 return; 840 } 841 if (str.length() < 2) { 842 out.write(str); 843 return; 844 } 845 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) { 846 out.write(str); 847 return; 848 } 849 850 // strip quotes 851 String quoteless = str.substring(1, str.length() - 1); 852 853 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 854 // deal with escaped quotes; ie) "" 855 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR); 856 } 857 858 out.write(str); 859 } 860 861 }