001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * <p>Escapes and unescapes {@code String}s for 035 * Java, Java Script, HTML and XML.</p> 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 * @deprecated as of 3.6, use commons-text 040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 041 * StringEscapeUtils</a> instead 042 */ 043@Deprecated 044public class StringEscapeUtils { 045 046 /* ESCAPE TRANSLATORS */ 047 048 /** 049 * Translator object for escaping Java. 050 * 051 * While {@link #escapeJava(String)} is the expected method of use, this 052 * object allows the Java escaping functionality to be used 053 * as the foundation for a custom translator. 054 * 055 * @since 3.0 056 */ 057 public static final CharSequenceTranslator ESCAPE_JAVA = 058 new LookupTranslator( 059 new String[][] { 060 {"\"", "\\\""}, 061 {"\\", "\\\\"}, 062 }).with( 063 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 064 ).with( 065 JavaUnicodeEscaper.outsideOf(32, 0x7f) 066 ); 067 068 /** 069 * Translator object for escaping EcmaScript/JavaScript. 070 * 071 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 072 * object allows the EcmaScript escaping functionality to be used 073 * as the foundation for a custom translator. 074 * 075 * @since 3.0 076 */ 077 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 078 new AggregateTranslator( 079 new LookupTranslator( 080 new String[][] { 081 {"'", "\\'"}, 082 {"\"", "\\\""}, 083 {"\\", "\\\\"}, 084 {"/", "\\/"} 085 }), 086 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 087 JavaUnicodeEscaper.outsideOf(32, 0x7f) 088 ); 089 090 /** 091 * Translator object for escaping Json. 092 * 093 * While {@link #escapeJson(String)} is the expected method of use, this 094 * object allows the Json escaping functionality to be used 095 * as the foundation for a custom translator. 096 * 097 * @since 3.2 098 */ 099 public static final CharSequenceTranslator ESCAPE_JSON = 100 new AggregateTranslator( 101 new LookupTranslator( 102 new String[][] { 103 {"\"", "\\\""}, 104 {"\\", "\\\\"}, 105 {"/", "\\/"} 106 }), 107 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 108 JavaUnicodeEscaper.outsideOf(32, 0x7f) 109 ); 110 111 /** 112 * Translator object for escaping XML. 113 * 114 * While {@link #escapeXml(String)} is the expected method of use, this 115 * object allows the XML escaping functionality to be used 116 * as the foundation for a custom translator. 117 * 118 * @since 3.0 119 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 120 */ 121 @Deprecated 122 public static final CharSequenceTranslator ESCAPE_XML = 123 new AggregateTranslator( 124 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 125 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 126 ); 127 128 /** 129 * Translator object for escaping XML 1.0. 130 * 131 * While {@link #escapeXml10(String)} is the expected method of use, this 132 * object allows the XML escaping functionality to be used 133 * as the foundation for a custom translator. 134 * 135 * @since 3.3 136 */ 137 public static final CharSequenceTranslator ESCAPE_XML10 = 138 new AggregateTranslator( 139 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 140 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 141 new LookupTranslator( 142 new String[][] { 143 { "\u0000", StringUtils.EMPTY }, 144 { "\u0001", StringUtils.EMPTY }, 145 { "\u0002", StringUtils.EMPTY }, 146 { "\u0003", StringUtils.EMPTY }, 147 { "\u0004", StringUtils.EMPTY }, 148 { "\u0005", StringUtils.EMPTY }, 149 { "\u0006", StringUtils.EMPTY }, 150 { "\u0007", StringUtils.EMPTY }, 151 { "\u0008", StringUtils.EMPTY }, 152 { "\u000b", StringUtils.EMPTY }, 153 { "\u000c", StringUtils.EMPTY }, 154 { "\u000e", StringUtils.EMPTY }, 155 { "\u000f", StringUtils.EMPTY }, 156 { "\u0010", StringUtils.EMPTY }, 157 { "\u0011", StringUtils.EMPTY }, 158 { "\u0012", StringUtils.EMPTY }, 159 { "\u0013", StringUtils.EMPTY }, 160 { "\u0014", StringUtils.EMPTY }, 161 { "\u0015", StringUtils.EMPTY }, 162 { "\u0016", StringUtils.EMPTY }, 163 { "\u0017", StringUtils.EMPTY }, 164 { "\u0018", StringUtils.EMPTY }, 165 { "\u0019", StringUtils.EMPTY }, 166 { "\u001a", StringUtils.EMPTY }, 167 { "\u001b", StringUtils.EMPTY }, 168 { "\u001c", StringUtils.EMPTY }, 169 { "\u001d", StringUtils.EMPTY }, 170 { "\u001e", StringUtils.EMPTY }, 171 { "\u001f", StringUtils.EMPTY }, 172 { "\ufffe", StringUtils.EMPTY }, 173 { "\uffff", StringUtils.EMPTY } 174 }), 175 NumericEntityEscaper.between(0x7f, 0x84), 176 NumericEntityEscaper.between(0x86, 0x9f), 177 new UnicodeUnpairedSurrogateRemover() 178 ); 179 180 /** 181 * Translator object for escaping XML 1.1. 182 * 183 * While {@link #escapeXml11(String)} is the expected method of use, this 184 * object allows the XML escaping functionality to be used 185 * as the foundation for a custom translator. 186 * 187 * @since 3.3 188 */ 189 public static final CharSequenceTranslator ESCAPE_XML11 = 190 new AggregateTranslator( 191 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 192 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 193 new LookupTranslator( 194 new String[][] { 195 { "\u0000", StringUtils.EMPTY }, 196 { "\u000b", "" }, 197 { "\u000c", "" }, 198 { "\ufffe", StringUtils.EMPTY }, 199 { "\uffff", StringUtils.EMPTY } 200 }), 201 NumericEntityEscaper.between(0x1, 0x8), 202 NumericEntityEscaper.between(0xe, 0x1f), 203 NumericEntityEscaper.between(0x7f, 0x84), 204 NumericEntityEscaper.between(0x86, 0x9f), 205 new UnicodeUnpairedSurrogateRemover() 206 ); 207 208 /** 209 * Translator object for escaping HTML version 3.0. 210 * 211 * While {@link #escapeHtml3(String)} is the expected method of use, this 212 * object allows the HTML escaping functionality to be used 213 * as the foundation for a custom translator. 214 * 215 * @since 3.0 216 */ 217 public static final CharSequenceTranslator ESCAPE_HTML3 = 218 new AggregateTranslator( 219 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 220 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 221 ); 222 223 /** 224 * Translator object for escaping HTML version 4.0. 225 * 226 * While {@link #escapeHtml4(String)} is the expected method of use, this 227 * object allows the HTML escaping functionality to be used 228 * as the foundation for a custom translator. 229 * 230 * @since 3.0 231 */ 232 public static final CharSequenceTranslator ESCAPE_HTML4 = 233 new AggregateTranslator( 234 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 235 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 236 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 237 ); 238 239 /** 240 * Translator object for escaping individual Comma Separated Values. 241 * 242 * While {@link #escapeCsv(String)} is the expected method of use, this 243 * object allows the CSV escaping functionality to be used 244 * as the foundation for a custom translator. 245 * 246 * @since 3.0 247 */ 248 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 249 250 // TODO: Create a parent class - 'SinglePassTranslator' ? 251 // It would handle the index checking + length returning, 252 // and could also have an optimization check method. 253 static class CsvEscaper extends CharSequenceTranslator { 254 255 private static final char CSV_DELIMITER = ','; 256 private static final char CSV_QUOTE = '"'; 257 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 258 private static final char[] CSV_SEARCH_CHARS = 259 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 260 261 @Override 262 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 263 264 if(index != 0) { 265 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 266 } 267 268 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 269 out.write(input.toString()); 270 } else { 271 out.write(CSV_QUOTE); 272 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 273 out.write(CSV_QUOTE); 274 } 275 return Character.codePointCount(input, 0, input.length()); 276 } 277 } 278 279 /* UNESCAPE TRANSLATORS */ 280 281 /** 282 * Translator object for unescaping escaped Java. 283 * 284 * While {@link #unescapeJava(String)} is the expected method of use, this 285 * object allows the Java unescaping functionality to be used 286 * as the foundation for a custom translator. 287 * 288 * @since 3.0 289 */ 290 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 291 public static final CharSequenceTranslator UNESCAPE_JAVA = 292 new AggregateTranslator( 293 new OctalUnescaper(), // .between('\1', '\377'), 294 new UnicodeUnescaper(), 295 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 296 new LookupTranslator( 297 new String[][] { 298 {"\\\\", "\\"}, 299 {"\\\"", "\""}, 300 {"\\'", "'"}, 301 {"\\", ""} 302 }) 303 ); 304 305 /** 306 * Translator object for unescaping escaped EcmaScript. 307 * 308 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 309 * object allows the EcmaScript unescaping functionality to be used 310 * as the foundation for a custom translator. 311 * 312 * @since 3.0 313 */ 314 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 315 316 /** 317 * Translator object for unescaping escaped Json. 318 * 319 * While {@link #unescapeJson(String)} is the expected method of use, this 320 * object allows the Json unescaping functionality to be used 321 * as the foundation for a custom translator. 322 * 323 * @since 3.2 324 */ 325 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 326 327 /** 328 * Translator object for unescaping escaped HTML 3.0. 329 * 330 * While {@link #unescapeHtml3(String)} is the expected method of use, this 331 * object allows the HTML unescaping functionality to be used 332 * as the foundation for a custom translator. 333 * 334 * @since 3.0 335 */ 336 public static final CharSequenceTranslator UNESCAPE_HTML3 = 337 new AggregateTranslator( 338 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 339 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 340 new NumericEntityUnescaper() 341 ); 342 343 /** 344 * Translator object for unescaping escaped HTML 4.0. 345 * 346 * While {@link #unescapeHtml4(String)} is the expected method of use, this 347 * object allows the HTML unescaping functionality to be used 348 * as the foundation for a custom translator. 349 * 350 * @since 3.0 351 */ 352 public static final CharSequenceTranslator UNESCAPE_HTML4 = 353 new AggregateTranslator( 354 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 355 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 356 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 357 new NumericEntityUnescaper() 358 ); 359 360 /** 361 * Translator object for unescaping escaped XML. 362 * 363 * While {@link #unescapeXml(String)} is the expected method of use, this 364 * object allows the XML unescaping functionality to be used 365 * as the foundation for a custom translator. 366 * 367 * @since 3.0 368 */ 369 public static final CharSequenceTranslator UNESCAPE_XML = 370 new AggregateTranslator( 371 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 372 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 373 new NumericEntityUnescaper() 374 ); 375 376 /** 377 * Translator object for unescaping escaped Comma Separated Value entries. 378 * 379 * While {@link #unescapeCsv(String)} is the expected method of use, this 380 * object allows the CSV unescaping functionality to be used 381 * as the foundation for a custom translator. 382 * 383 * @since 3.0 384 */ 385 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 386 387 static class CsvUnescaper extends CharSequenceTranslator { 388 389 private static final char CSV_DELIMITER = ','; 390 private static final char CSV_QUOTE = '"'; 391 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 392 private static final char[] CSV_SEARCH_CHARS = 393 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 394 395 @Override 396 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 397 398 if(index != 0) { 399 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 400 } 401 402 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 403 out.write(input.toString()); 404 return Character.codePointCount(input, 0, input.length()); 405 } 406 407 // strip quotes 408 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 409 410 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 411 // deal with escaped quotes; ie) "" 412 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 413 } else { 414 out.write(input.toString()); 415 } 416 return Character.codePointCount(input, 0, input.length()); 417 } 418 } 419 420 /* Helper functions */ 421 422 /** 423 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 424 * standard programming.</p> 425 * 426 * <p>Instead, the class should be used as:</p> 427 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 428 * 429 * <p>This constructor is public to permit tools that require a JavaBean 430 * instance to operate.</p> 431 */ 432 public StringEscapeUtils() { 433 super(); 434 } 435 436 // Java and JavaScript 437 //-------------------------------------------------------------------------- 438 /** 439 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 440 * 441 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 442 * 443 * <p>So a tab becomes the characters {@code '\\'} and 444 * {@code 't'}.</p> 445 * 446 * <p>The only difference between Java strings and JavaScript strings 447 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 448 * 449 * <p>Example:</p> 450 * <pre> 451 * input string: He didn't say, "Stop!" 452 * output string: He didn't say, \"Stop!\" 453 * </pre> 454 * 455 * @param input String to escape values in, may be null 456 * @return String with escaped values, {@code null} if null string input 457 */ 458 public static final String escapeJava(final String input) { 459 return ESCAPE_JAVA.translate(input); 460 } 461 462 /** 463 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 464 * <p>Escapes any values it finds into their EcmaScript String form. 465 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 466 * 467 * <p>So a tab becomes the characters {@code '\\'} and 468 * {@code 't'}.</p> 469 * 470 * <p>The only difference between Java strings and EcmaScript strings 471 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 472 * 473 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 474 * 475 * <p>Example:</p> 476 * <pre> 477 * input string: He didn't say, "Stop!" 478 * output string: He didn\'t say, \"Stop!\" 479 * </pre> 480 * 481 * @param input String to escape values in, may be null 482 * @return String with escaped values, {@code null} if null string input 483 * 484 * @since 3.0 485 */ 486 public static final String escapeEcmaScript(final String input) { 487 return ESCAPE_ECMASCRIPT.translate(input); 488 } 489 490 /** 491 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 492 * <p>Escapes any values it finds into their Json String form. 493 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 494 * 495 * <p>So a tab becomes the characters {@code '\\'} and 496 * {@code 't'}.</p> 497 * 498 * <p>The only difference between Java strings and Json strings 499 * is that in Json, forward-slash (/) is escaped.</p> 500 * 501 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 502 * 503 * <p>Example:</p> 504 * <pre> 505 * input string: He didn't say, "Stop!" 506 * output string: He didn't say, \"Stop!\" 507 * </pre> 508 * 509 * @param input String to escape values in, may be null 510 * @return String with escaped values, {@code null} if null string input 511 * 512 * @since 3.2 513 */ 514 public static final String escapeJson(final String input) { 515 return ESCAPE_JSON.translate(input); 516 } 517 518 /** 519 * <p>Unescapes any Java literals found in the {@code String}. 520 * For example, it will turn a sequence of {@code '\'} and 521 * {@code 'n'} into a newline character, unless the {@code '\'} 522 * is preceded by another {@code '\'}.</p> 523 * 524 * @param input the {@code String} to unescape, may be null 525 * @return a new unescaped {@code String}, {@code null} if null string input 526 */ 527 public static final String unescapeJava(final String input) { 528 return UNESCAPE_JAVA.translate(input); 529 } 530 531 /** 532 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 533 * 534 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 535 * into a newline character, unless the {@code '\'} is preceded by another 536 * {@code '\'}.</p> 537 * 538 * @see #unescapeJava(String) 539 * @param input the {@code String} to unescape, may be null 540 * @return A new unescaped {@code String}, {@code null} if null string input 541 * 542 * @since 3.0 543 */ 544 public static final String unescapeEcmaScript(final String input) { 545 return UNESCAPE_ECMASCRIPT.translate(input); 546 } 547 548 /** 549 * <p>Unescapes any Json literals found in the {@code String}.</p> 550 * 551 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 552 * into a newline character, unless the {@code '\'} is preceded by another 553 * {@code '\'}.</p> 554 * 555 * @see #unescapeJava(String) 556 * @param input the {@code String} to unescape, may be null 557 * @return A new unescaped {@code String}, {@code null} if null string input 558 * 559 * @since 3.2 560 */ 561 public static final String unescapeJson(final String input) { 562 return UNESCAPE_JSON.translate(input); 563 } 564 565 // HTML and XML 566 //-------------------------------------------------------------------------- 567 /** 568 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 569 * 570 * <p> 571 * For example: 572 * </p> 573 * <p><code>"bread" & "butter"</code></p> 574 * becomes: 575 * <p> 576 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 577 * </p> 578 * 579 * <p>Supports all known HTML 4.0 entities, including funky accents. 580 * Note that the commonly used apostrophe escape character (&apos;) 581 * is not a legal entity and so is not supported). </p> 582 * 583 * @param input the {@code String} to escape, may be null 584 * @return a new escaped {@code String}, {@code null} if null string input 585 * 586 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 587 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 588 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 589 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 590 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 591 * 592 * @since 3.0 593 */ 594 public static final String escapeHtml4(final String input) { 595 return ESCAPE_HTML4.translate(input); 596 } 597 598 /** 599 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 600 * <p>Supports only the HTML 3.0 entities. </p> 601 * 602 * @param input the {@code String} to escape, may be null 603 * @return a new escaped {@code String}, {@code null} if null string input 604 * 605 * @since 3.0 606 */ 607 public static final String escapeHtml3(final String input) { 608 return ESCAPE_HTML3.translate(input); 609 } 610 611 //----------------------------------------------------------------------- 612 /** 613 * <p>Unescapes a string containing entity escapes to a string 614 * containing the actual Unicode characters corresponding to the 615 * escapes. Supports HTML 4.0 entities.</p> 616 * 617 * <p>For example, the string {@code "<Français>"} 618 * will become {@code "<Français>"}</p> 619 * 620 * <p>If an entity is unrecognized, it is left alone, and inserted 621 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 622 * become {@code ">&zzzz;x"}.</p> 623 * 624 * @param input the {@code String} to unescape, may be null 625 * @return a new unescaped {@code String}, {@code null} if null string input 626 * 627 * @since 3.0 628 */ 629 public static final String unescapeHtml4(final String input) { 630 return UNESCAPE_HTML4.translate(input); 631 } 632 633 /** 634 * <p>Unescapes a string containing entity escapes to a string 635 * containing the actual Unicode characters corresponding to the 636 * escapes. Supports only HTML 3.0 entities.</p> 637 * 638 * @param input the {@code String} to unescape, may be null 639 * @return a new unescaped {@code String}, {@code null} if null string input 640 * 641 * @since 3.0 642 */ 643 public static final String unescapeHtml3(final String input) { 644 return UNESCAPE_HTML3.translate(input); 645 } 646 647 //----------------------------------------------------------------------- 648 /** 649 * <p>Escapes the characters in a {@code String} using XML entities.</p> 650 * 651 * <p>For example: {@code "bread" & "butter"} => 652 * {@code "bread" & "butter"}. 653 * </p> 654 * 655 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 656 * Does not support DTDs or external entities.</p> 657 * 658 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 659 * escaped. If you still wish this functionality, you can achieve it 660 * via the following: 661 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 662 * 663 * @param input the {@code String} to escape, may be null 664 * @return a new escaped {@code String}, {@code null} if null string input 665 * @see #unescapeXml(java.lang.String) 666 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 667 */ 668 @Deprecated 669 public static final String escapeXml(final String input) { 670 return ESCAPE_XML.translate(input); 671 } 672 673 /** 674 * <p>Escapes the characters in a {@code String} using XML entities.</p> 675 * 676 * <p>For example: {@code "bread" & "butter"} => 677 * {@code "bread" & "butter"}. 678 * </p> 679 * 680 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 681 * characters or unpaired Unicode surrogate codepoints, even after escaping. 682 * {@code escapeXml10} will remove characters that do not fit in the 683 * following ranges:</p> 684 * 685 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 686 * 687 * <p>Though not strictly necessary, {@code escapeXml10} will escape 688 * characters in the following ranges:</p> 689 * 690 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 691 * 692 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 693 * document. If you want to allow more non-text characters in an XML 1.1 694 * document, use {@link #escapeXml11(String)}.</p> 695 * 696 * @param input the {@code String} to escape, may be null 697 * @return a new escaped {@code String}, {@code null} if null string input 698 * @see #unescapeXml(java.lang.String) 699 * @since 3.3 700 */ 701 public static String escapeXml10(final String input) { 702 return ESCAPE_XML10.translate(input); 703 } 704 705 /** 706 * <p>Escapes the characters in a {@code String} using XML entities.</p> 707 * 708 * <p>For example: {@code "bread" & "butter"} => 709 * {@code "bread" & "butter"}. 710 * </p> 711 * 712 * <p>XML 1.1 can represent certain control characters, but it cannot represent 713 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 714 * {@code escapeXml11} will remove characters that do not fit in the following 715 * ranges:</p> 716 * 717 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 718 * 719 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 720 * 721 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 722 * 723 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 724 * use it for XML 1.0 documents.</p> 725 * 726 * @param input the {@code String} to escape, may be null 727 * @return a new escaped {@code String}, {@code null} if null string input 728 * @see #unescapeXml(java.lang.String) 729 * @since 3.3 730 */ 731 public static String escapeXml11(final String input) { 732 return ESCAPE_XML11.translate(input); 733 } 734 735 //----------------------------------------------------------------------- 736 /** 737 * <p>Unescapes a string containing XML entity escapes to a string 738 * containing the actual Unicode characters corresponding to the 739 * escapes.</p> 740 * 741 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 742 * Does not support DTDs or external entities.</p> 743 * 744 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 745 * Unicode characters. This may change in future releases. </p> 746 * 747 * @param input the {@code String} to unescape, may be null 748 * @return a new unescaped {@code String}, {@code null} if null string input 749 * @see #escapeXml(String) 750 * @see #escapeXml10(String) 751 * @see #escapeXml11(String) 752 */ 753 public static final String unescapeXml(final String input) { 754 return UNESCAPE_XML.translate(input); 755 } 756 757 //----------------------------------------------------------------------- 758 759 /** 760 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 761 * if required.</p> 762 * 763 * <p>If the value contains a comma, newline or double quote, then the 764 * String value is returned enclosed in double quotes.</p> 765 * 766 * <p>Any double quote characters in the value are escaped with another double quote.</p> 767 * 768 * <p>If the value does not contain a comma, newline or double quote, then the 769 * String value is returned unchanged.</p> 770 * 771 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 772 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 773 * 774 * @param input the input CSV column String, may be null 775 * @return the input String, enclosed in double quotes if the value contains a comma, 776 * newline or double quote, {@code null} if null string input 777 * @since 2.4 778 */ 779 public static final String escapeCsv(final String input) { 780 return ESCAPE_CSV.translate(input); 781 } 782 783 /** 784 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 785 * 786 * <p>If the value is enclosed in double quotes, and contains a comma, newline 787 * or double quote, then quotes are removed. 788 * </p> 789 * 790 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 791 * to just one double quote. </p> 792 * 793 * <p>If the value is not enclosed in double quotes, or is and does not contain a 794 * comma, newline or double quote, then the String value is returned unchanged.</p> 795 * 796 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 797 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 798 * 799 * @param input the input CSV column String, may be null 800 * @return the input String, with enclosing double quotes removed and embedded double 801 * quotes unescaped, {@code null} if null string input 802 * @since 2.4 803 */ 804 public static final String unescapeCsv(final String input) { 805 return UNESCAPE_CSV.translate(input); 806 } 807 808}