001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * <p>Escapes and unescapes {@code String}s for 035 * Java, Java Script, HTML and XML.</p> 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 */ 040public class StringEscapeUtils { 041 042 /* ESCAPE TRANSLATORS */ 043 044 /** 045 * Translator object for escaping Java. 046 * 047 * While {@link #escapeJava(String)} is the expected method of use, this 048 * object allows the Java escaping functionality to be used 049 * as the foundation for a custom translator. 050 * 051 * @since 3.0 052 */ 053 public static final CharSequenceTranslator ESCAPE_JAVA = 054 new LookupTranslator( 055 new String[][] { 056 {"\"", "\\\""}, 057 {"\\", "\\\\"}, 058 }).with( 059 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 060 ).with( 061 JavaUnicodeEscaper.outsideOf(32, 0x7f) 062 ); 063 064 /** 065 * Translator object for escaping EcmaScript/JavaScript. 066 * 067 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 068 * object allows the EcmaScript escaping functionality to be used 069 * as the foundation for a custom translator. 070 * 071 * @since 3.0 072 */ 073 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 074 new AggregateTranslator( 075 new LookupTranslator( 076 new String[][] { 077 {"'", "\\'"}, 078 {"\"", "\\\""}, 079 {"\\", "\\\\"}, 080 {"/", "\\/"} 081 }), 082 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 083 JavaUnicodeEscaper.outsideOf(32, 0x7f) 084 ); 085 086 /** 087 * Translator object for escaping Json. 088 * 089 * While {@link #escapeJson(String)} is the expected method of use, this 090 * object allows the Json escaping functionality to be used 091 * as the foundation for a custom translator. 092 * 093 * @since 3.2 094 */ 095 public static final CharSequenceTranslator ESCAPE_JSON = 096 new AggregateTranslator( 097 new LookupTranslator( 098 new String[][] { 099 {"\"", "\\\""}, 100 {"\\", "\\\\"}, 101 {"/", "\\/"} 102 }), 103 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 104 JavaUnicodeEscaper.outsideOf(32, 0x7f) 105 ); 106 107 /** 108 * Translator object for escaping XML. 109 * 110 * While {@link #escapeXml(String)} is the expected method of use, this 111 * object allows the XML escaping functionality to be used 112 * as the foundation for a custom translator. 113 * 114 * @since 3.0 115 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 116 */ 117 @Deprecated 118 public static final CharSequenceTranslator ESCAPE_XML = 119 new AggregateTranslator( 120 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 121 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 122 ); 123 124 /** 125 * Translator object for escaping XML 1.0. 126 * 127 * While {@link #escapeXml10(String)} is the expected method of use, this 128 * object allows the XML escaping functionality to be used 129 * as the foundation for a custom translator. 130 * 131 * @since 3.3 132 */ 133 public static final CharSequenceTranslator ESCAPE_XML10 = 134 new AggregateTranslator( 135 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 136 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 137 new LookupTranslator( 138 new String[][] { 139 { "\u0000", StringUtils.EMPTY }, 140 { "\u0001", StringUtils.EMPTY }, 141 { "\u0002", StringUtils.EMPTY }, 142 { "\u0003", StringUtils.EMPTY }, 143 { "\u0004", StringUtils.EMPTY }, 144 { "\u0005", StringUtils.EMPTY }, 145 { "\u0006", StringUtils.EMPTY }, 146 { "\u0007", StringUtils.EMPTY }, 147 { "\u0008", StringUtils.EMPTY }, 148 { "\u000b", StringUtils.EMPTY }, 149 { "\u000c", StringUtils.EMPTY }, 150 { "\u000e", StringUtils.EMPTY }, 151 { "\u000f", StringUtils.EMPTY }, 152 { "\u0010", StringUtils.EMPTY }, 153 { "\u0011", StringUtils.EMPTY }, 154 { "\u0012", StringUtils.EMPTY }, 155 { "\u0013", StringUtils.EMPTY }, 156 { "\u0014", StringUtils.EMPTY }, 157 { "\u0015", StringUtils.EMPTY }, 158 { "\u0016", StringUtils.EMPTY }, 159 { "\u0017", StringUtils.EMPTY }, 160 { "\u0018", StringUtils.EMPTY }, 161 { "\u0019", StringUtils.EMPTY }, 162 { "\u001a", StringUtils.EMPTY }, 163 { "\u001b", StringUtils.EMPTY }, 164 { "\u001c", StringUtils.EMPTY }, 165 { "\u001d", StringUtils.EMPTY }, 166 { "\u001e", StringUtils.EMPTY }, 167 { "\u001f", StringUtils.EMPTY }, 168 { "\ufffe", StringUtils.EMPTY }, 169 { "\uffff", StringUtils.EMPTY } 170 }), 171 NumericEntityEscaper.between(0x7f, 0x84), 172 NumericEntityEscaper.between(0x86, 0x9f), 173 new UnicodeUnpairedSurrogateRemover() 174 ); 175 176 /** 177 * Translator object for escaping XML 1.1. 178 * 179 * While {@link #escapeXml11(String)} is the expected method of use, this 180 * object allows the XML escaping functionality to be used 181 * as the foundation for a custom translator. 182 * 183 * @since 3.3 184 */ 185 public static final CharSequenceTranslator ESCAPE_XML11 = 186 new AggregateTranslator( 187 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 188 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 189 new LookupTranslator( 190 new String[][] { 191 { "\u0000", StringUtils.EMPTY }, 192 { "\u000b", "" }, 193 { "\u000c", "" }, 194 { "\ufffe", StringUtils.EMPTY }, 195 { "\uffff", StringUtils.EMPTY } 196 }), 197 NumericEntityEscaper.between(0x1, 0x8), 198 NumericEntityEscaper.between(0xe, 0x1f), 199 NumericEntityEscaper.between(0x7f, 0x84), 200 NumericEntityEscaper.between(0x86, 0x9f), 201 new UnicodeUnpairedSurrogateRemover() 202 ); 203 204 /** 205 * Translator object for escaping HTML version 3.0. 206 * 207 * While {@link #escapeHtml3(String)} is the expected method of use, this 208 * object allows the HTML escaping functionality to be used 209 * as the foundation for a custom translator. 210 * 211 * @since 3.0 212 */ 213 public static final CharSequenceTranslator ESCAPE_HTML3 = 214 new AggregateTranslator( 215 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 216 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 217 ); 218 219 /** 220 * Translator object for escaping HTML version 4.0. 221 * 222 * While {@link #escapeHtml4(String)} is the expected method of use, this 223 * object allows the HTML escaping functionality to be used 224 * as the foundation for a custom translator. 225 * 226 * @since 3.0 227 */ 228 public static final CharSequenceTranslator ESCAPE_HTML4 = 229 new AggregateTranslator( 230 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 231 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 232 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 233 ); 234 235 /** 236 * Translator object for escaping individual Comma Separated Values. 237 * 238 * While {@link #escapeCsv(String)} is the expected method of use, this 239 * object allows the CSV escaping functionality to be used 240 * as the foundation for a custom translator. 241 * 242 * @since 3.0 243 */ 244 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 245 246 // TODO: Create a parent class - 'SinglePassTranslator' ? 247 // It would handle the index checking + length returning, 248 // and could also have an optimization check method. 249 static class CsvEscaper extends CharSequenceTranslator { 250 251 private static final char CSV_DELIMITER = ','; 252 private static final char CSV_QUOTE = '"'; 253 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 254 private static final char[] CSV_SEARCH_CHARS = 255 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 256 257 @Override 258 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 259 260 if(index != 0) { 261 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 262 } 263 264 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 265 out.write(input.toString()); 266 } else { 267 out.write(CSV_QUOTE); 268 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 269 out.write(CSV_QUOTE); 270 } 271 return Character.codePointCount(input, 0, input.length()); 272 } 273 } 274 275 /* UNESCAPE TRANSLATORS */ 276 277 /** 278 * Translator object for unescaping escaped Java. 279 * 280 * While {@link #unescapeJava(String)} is the expected method of use, this 281 * object allows the Java unescaping functionality to be used 282 * as the foundation for a custom translator. 283 * 284 * @since 3.0 285 */ 286 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 287 public static final CharSequenceTranslator UNESCAPE_JAVA = 288 new AggregateTranslator( 289 new OctalUnescaper(), // .between('\1', '\377'), 290 new UnicodeUnescaper(), 291 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 292 new LookupTranslator( 293 new String[][] { 294 {"\\\\", "\\"}, 295 {"\\\"", "\""}, 296 {"\\'", "'"}, 297 {"\\", ""} 298 }) 299 ); 300 301 /** 302 * Translator object for unescaping escaped EcmaScript. 303 * 304 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 305 * object allows the EcmaScript unescaping functionality to be used 306 * as the foundation for a custom translator. 307 * 308 * @since 3.0 309 */ 310 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 311 312 /** 313 * Translator object for unescaping escaped Json. 314 * 315 * While {@link #unescapeJson(String)} is the expected method of use, this 316 * object allows the Json unescaping functionality to be used 317 * as the foundation for a custom translator. 318 * 319 * @since 3.2 320 */ 321 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 322 323 /** 324 * Translator object for unescaping escaped HTML 3.0. 325 * 326 * While {@link #unescapeHtml3(String)} is the expected method of use, this 327 * object allows the HTML unescaping functionality to be used 328 * as the foundation for a custom translator. 329 * 330 * @since 3.0 331 */ 332 public static final CharSequenceTranslator UNESCAPE_HTML3 = 333 new AggregateTranslator( 334 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 335 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 336 new NumericEntityUnescaper() 337 ); 338 339 /** 340 * Translator object for unescaping escaped HTML 4.0. 341 * 342 * While {@link #unescapeHtml4(String)} is the expected method of use, this 343 * object allows the HTML unescaping functionality to be used 344 * as the foundation for a custom translator. 345 * 346 * @since 3.0 347 */ 348 public static final CharSequenceTranslator UNESCAPE_HTML4 = 349 new AggregateTranslator( 350 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 351 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 352 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 353 new NumericEntityUnescaper() 354 ); 355 356 /** 357 * Translator object for unescaping escaped XML. 358 * 359 * While {@link #unescapeXml(String)} is the expected method of use, this 360 * object allows the XML unescaping functionality to be used 361 * as the foundation for a custom translator. 362 * 363 * @since 3.0 364 */ 365 public static final CharSequenceTranslator UNESCAPE_XML = 366 new AggregateTranslator( 367 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 368 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 369 new NumericEntityUnescaper() 370 ); 371 372 /** 373 * Translator object for unescaping escaped Comma Separated Value entries. 374 * 375 * While {@link #unescapeCsv(String)} is the expected method of use, this 376 * object allows the CSV unescaping functionality to be used 377 * as the foundation for a custom translator. 378 * 379 * @since 3.0 380 */ 381 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 382 383 static class CsvUnescaper extends CharSequenceTranslator { 384 385 private static final char CSV_DELIMITER = ','; 386 private static final char CSV_QUOTE = '"'; 387 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 388 private static final char[] CSV_SEARCH_CHARS = 389 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 390 391 @Override 392 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 393 394 if(index != 0) { 395 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 396 } 397 398 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 399 out.write(input.toString()); 400 return Character.codePointCount(input, 0, input.length()); 401 } 402 403 // strip quotes 404 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 405 406 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 407 // deal with escaped quotes; ie) "" 408 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 409 } else { 410 out.write(input.toString()); 411 } 412 return Character.codePointCount(input, 0, input.length()); 413 } 414 } 415 416 /* Helper functions */ 417 418 /** 419 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 420 * standard programming.</p> 421 * 422 * <p>Instead, the class should be used as:</p> 423 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 424 * 425 * <p>This constructor is public to permit tools that require a JavaBean 426 * instance to operate.</p> 427 */ 428 public StringEscapeUtils() { 429 super(); 430 } 431 432 // Java and JavaScript 433 //-------------------------------------------------------------------------- 434 /** 435 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 436 * 437 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 438 * 439 * <p>So a tab becomes the characters {@code '\\'} and 440 * {@code 't'}.</p> 441 * 442 * <p>The only difference between Java strings and JavaScript strings 443 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 444 * 445 * <p>Example:</p> 446 * <pre> 447 * input string: He didn't say, "Stop!" 448 * output string: He didn't say, \"Stop!\" 449 * </pre> 450 * 451 * @param input String to escape values in, may be null 452 * @return String with escaped values, {@code null} if null string input 453 */ 454 public static final String escapeJava(final String input) { 455 return ESCAPE_JAVA.translate(input); 456 } 457 458 /** 459 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 460 * <p>Escapes any values it finds into their EcmaScript String form. 461 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 462 * 463 * <p>So a tab becomes the characters {@code '\\'} and 464 * {@code 't'}.</p> 465 * 466 * <p>The only difference between Java strings and EcmaScript strings 467 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 468 * 469 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 470 * 471 * <p>Example:</p> 472 * <pre> 473 * input string: He didn't say, "Stop!" 474 * output string: He didn\'t say, \"Stop!\" 475 * </pre> 476 * 477 * @param input String to escape values in, may be null 478 * @return String with escaped values, {@code null} if null string input 479 * 480 * @since 3.0 481 */ 482 public static final String escapeEcmaScript(final String input) { 483 return ESCAPE_ECMASCRIPT.translate(input); 484 } 485 486 /** 487 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 488 * <p>Escapes any values it finds into their Json String form. 489 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 490 * 491 * <p>So a tab becomes the characters {@code '\\'} and 492 * {@code 't'}.</p> 493 * 494 * <p>The only difference between Java strings and Json strings 495 * is that in Json, forward-slash (/) is escaped.</p> 496 * 497 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 498 * 499 * <p>Example:</p> 500 * <pre> 501 * input string: He didn't say, "Stop!" 502 * output string: He didn't say, \"Stop!\" 503 * </pre> 504 * 505 * @param input String to escape values in, may be null 506 * @return String with escaped values, {@code null} if null string input 507 * 508 * @since 3.2 509 */ 510 public static final String escapeJson(final String input) { 511 return ESCAPE_JSON.translate(input); 512 } 513 514 /** 515 * <p>Unescapes any Java literals found in the {@code String}. 516 * For example, it will turn a sequence of {@code '\'} and 517 * {@code 'n'} into a newline character, unless the {@code '\'} 518 * is preceded by another {@code '\'}.</p> 519 * 520 * @param input the {@code String} to unescape, may be null 521 * @return a new unescaped {@code String}, {@code null} if null string input 522 */ 523 public static final String unescapeJava(final String input) { 524 return UNESCAPE_JAVA.translate(input); 525 } 526 527 /** 528 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 529 * 530 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 531 * into a newline character, unless the {@code '\'} is preceded by another 532 * {@code '\'}.</p> 533 * 534 * @see #unescapeJava(String) 535 * @param input the {@code String} to unescape, may be null 536 * @return A new unescaped {@code String}, {@code null} if null string input 537 * 538 * @since 3.0 539 */ 540 public static final String unescapeEcmaScript(final String input) { 541 return UNESCAPE_ECMASCRIPT.translate(input); 542 } 543 544 /** 545 * <p>Unescapes any Json literals found in the {@code String}.</p> 546 * 547 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 548 * into a newline character, unless the {@code '\'} is preceded by another 549 * {@code '\'}.</p> 550 * 551 * @see #unescapeJava(String) 552 * @param input the {@code String} to unescape, may be null 553 * @return A new unescaped {@code String}, {@code null} if null string input 554 * 555 * @since 3.2 556 */ 557 public static final String unescapeJson(final String input) { 558 return UNESCAPE_JSON.translate(input); 559 } 560 561 // HTML and XML 562 //-------------------------------------------------------------------------- 563 /** 564 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 565 * 566 * <p> 567 * For example: 568 * </p> 569 * <p><code>"bread" & "butter"</code></p> 570 * becomes: 571 * <p> 572 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 573 * </p> 574 * 575 * <p>Supports all known HTML 4.0 entities, including funky accents. 576 * Note that the commonly used apostrophe escape character (&apos;) 577 * is not a legal entity and so is not supported). </p> 578 * 579 * @param input the {@code String} to escape, may be null 580 * @return a new escaped {@code String}, {@code null} if null string input 581 * 582 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 583 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 584 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 585 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 586 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 587 * 588 * @since 3.0 589 */ 590 public static final String escapeHtml4(final String input) { 591 return ESCAPE_HTML4.translate(input); 592 } 593 594 /** 595 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 596 * <p>Supports only the HTML 3.0 entities. </p> 597 * 598 * @param input the {@code String} to escape, may be null 599 * @return a new escaped {@code String}, {@code null} if null string input 600 * 601 * @since 3.0 602 */ 603 public static final String escapeHtml3(final String input) { 604 return ESCAPE_HTML3.translate(input); 605 } 606 607 //----------------------------------------------------------------------- 608 /** 609 * <p>Unescapes a string containing entity escapes to a string 610 * containing the actual Unicode characters corresponding to the 611 * escapes. Supports HTML 4.0 entities.</p> 612 * 613 * <p>For example, the string {@code "<Français>"} 614 * will become {@code "<Français>"}</p> 615 * 616 * <p>If an entity is unrecognized, it is left alone, and inserted 617 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 618 * become {@code ">&zzzz;x"}.</p> 619 * 620 * @param input the {@code String} to unescape, may be null 621 * @return a new unescaped {@code String}, {@code null} if null string input 622 * 623 * @since 3.0 624 */ 625 public static final String unescapeHtml4(final String input) { 626 return UNESCAPE_HTML4.translate(input); 627 } 628 629 /** 630 * <p>Unescapes a string containing entity escapes to a string 631 * containing the actual Unicode characters corresponding to the 632 * escapes. Supports only HTML 3.0 entities.</p> 633 * 634 * @param input the {@code String} to unescape, may be null 635 * @return a new unescaped {@code String}, {@code null} if null string input 636 * 637 * @since 3.0 638 */ 639 public static final String unescapeHtml3(final String input) { 640 return UNESCAPE_HTML3.translate(input); 641 } 642 643 //----------------------------------------------------------------------- 644 /** 645 * <p>Escapes the characters in a {@code String} using XML entities.</p> 646 * 647 * <p>For example: {@code "bread" & "butter"} => 648 * {@code "bread" & "butter"}. 649 * </p> 650 * 651 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 652 * Does not support DTDs or external entities.</p> 653 * 654 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 655 * escaped. If you still wish this functionality, you can achieve it 656 * via the following: 657 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 658 * 659 * @param input the {@code String} to escape, may be null 660 * @return a new escaped {@code String}, {@code null} if null string input 661 * @see #unescapeXml(java.lang.String) 662 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 663 */ 664 @Deprecated 665 public static final String escapeXml(final String input) { 666 return ESCAPE_XML.translate(input); 667 } 668 669 /** 670 * <p>Escapes the characters in a {@code String} using XML entities.</p> 671 * 672 * <p>For example: {@code "bread" & "butter"} => 673 * {@code "bread" & "butter"}. 674 * </p> 675 * 676 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 677 * characters or unpaired Unicode surrogate codepoints, even after escaping. 678 * {@code escapeXml10} will remove characters that do not fit in the 679 * following ranges:</p> 680 * 681 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 682 * 683 * <p>Though not strictly necessary, {@code escapeXml10} will escape 684 * characters in the following ranges:</p> 685 * 686 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 687 * 688 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 689 * document. If you want to allow more non-text characters in an XML 1.1 690 * document, use {@link #escapeXml11(String)}.</p> 691 * 692 * @param input the {@code String} to escape, may be null 693 * @return a new escaped {@code String}, {@code null} if null string input 694 * @see #unescapeXml(java.lang.String) 695 * @since 3.3 696 */ 697 public static String escapeXml10(final String input) { 698 return ESCAPE_XML10.translate(input); 699 } 700 701 /** 702 * <p>Escapes the characters in a {@code String} using XML entities.</p> 703 * 704 * <p>For example: {@code "bread" & "butter"} => 705 * {@code "bread" & "butter"}. 706 * </p> 707 * 708 * <p>XML 1.1 can represent certain control characters, but it cannot represent 709 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 710 * {@code escapeXml11} will remove characters that do not fit in the following 711 * ranges:</p> 712 * 713 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 714 * 715 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 716 * 717 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 718 * 719 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 720 * use it for XML 1.0 documents.</p> 721 * 722 * @param input the {@code String} to escape, may be null 723 * @return a new escaped {@code String}, {@code null} if null string input 724 * @see #unescapeXml(java.lang.String) 725 * @since 3.3 726 */ 727 public static String escapeXml11(final String input) { 728 return ESCAPE_XML11.translate(input); 729 } 730 731 //----------------------------------------------------------------------- 732 /** 733 * <p>Unescapes a string containing XML entity escapes to a string 734 * containing the actual Unicode characters corresponding to the 735 * escapes.</p> 736 * 737 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 738 * Does not support DTDs or external entities.</p> 739 * 740 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 741 * Unicode characters. This may change in future releases. </p> 742 * 743 * @param input the {@code String} to unescape, may be null 744 * @return a new unescaped {@code String}, {@code null} if null string input 745 * @see #escapeXml(String) 746 * @see #escapeXml10(String) 747 * @see #escapeXml11(String) 748 */ 749 public static final String unescapeXml(final String input) { 750 return UNESCAPE_XML.translate(input); 751 } 752 753 //----------------------------------------------------------------------- 754 755 /** 756 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 757 * if required.</p> 758 * 759 * <p>If the value contains a comma, newline or double quote, then the 760 * String value is returned enclosed in double quotes.</p> 761 * 762 * <p>Any double quote characters in the value are escaped with another double quote.</p> 763 * 764 * <p>If the value does not contain a comma, newline or double quote, then the 765 * String value is returned unchanged.</p> 766 * 767 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 768 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 769 * 770 * @param input the input CSV column String, may be null 771 * @return the input String, enclosed in double quotes if the value contains a comma, 772 * newline or double quote, {@code null} if null string input 773 * @since 2.4 774 */ 775 public static final String escapeCsv(final String input) { 776 return ESCAPE_CSV.translate(input); 777 } 778 779 /** 780 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 781 * 782 * <p>If the value is enclosed in double quotes, and contains a comma, newline 783 * or double quote, then quotes are removed. 784 * </p> 785 * 786 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 787 * to just one double quote. </p> 788 * 789 * <p>If the value is not enclosed in double quotes, or is and does not contain a 790 * comma, newline or double quote, then the String value is returned unchanged.</p> 791 * 792 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 793 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 794 * 795 * @param input the input CSV column String, may be null 796 * @return the input String, with enclosing double quotes removed and embedded double 797 * quotes unescaped, {@code null} if null string input 798 * @since 2.4 799 */ 800 public static final String unescapeCsv(final String input) { 801 return UNESCAPE_CSV.translate(input); 802 } 803 804}