1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3; 18 19 import java.io.IOException; 20 import java.io.Writer; 21 22 import org.apache.commons.lang3.text.translate.AggregateTranslator; 23 import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 24 import org.apache.commons.lang3.text.translate.EntityArrays; 25 import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 26 import org.apache.commons.lang3.text.translate.LookupTranslator; 27 import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 28 import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 29 import org.apache.commons.lang3.text.translate.OctalUnescaper; 30 import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 31 import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 32 33 /** 34 * Escapes and unescapes {@link String}s for 35 * Java, Java Script, HTML and XML. 36 * 37 * <p>#ThreadSafe#</p> 38 * @since 2.0 39 * @deprecated As of 3.6, use Apache Commons Text 40 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 41 * StringEscapeUtils</a> instead 42 */ 43 @Deprecated 44 public class StringEscapeUtils { 45 46 /* ESCAPE TRANSLATORS */ 47 48 /** 49 * Translator object for escaping Java. 50 * 51 * While {@link #escapeJava(String)} is the expected method of use, this 52 * object allows the Java escaping functionality to be used 53 * as the foundation for a custom translator. 54 * 55 * @since 3.0 56 */ 57 public static final CharSequenceTranslator ESCAPE_JAVA = 58 new LookupTranslator( 59 new String[][] { 60 {"\"", "\\\""}, 61 {"\\", "\\\\"}, 62 }).with( 63 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 64 ).with( 65 JavaUnicodeEscaper.outsideOf(32, 0x7f) 66 ); 67 68 /** 69 * Translator object for escaping EcmaScript/JavaScript. 70 * 71 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 72 * object allows the EcmaScript escaping functionality to be used 73 * as the foundation for a custom translator. 74 * 75 * @since 3.0 76 */ 77 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 78 new AggregateTranslator( 79 new LookupTranslator( 80 new String[][] { 81 {"'", "\\'"}, 82 {"\"", "\\\""}, 83 {"\\", "\\\\"}, 84 {"/", "\\/"} 85 }), 86 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 87 JavaUnicodeEscaper.outsideOf(32, 0x7f) 88 ); 89 90 /** 91 * Translator object for escaping Json. 92 * 93 * While {@link #escapeJson(String)} is the expected method of use, this 94 * object allows the Json escaping functionality to be used 95 * as the foundation for a custom translator. 96 * 97 * @since 3.2 98 */ 99 public static final CharSequenceTranslator ESCAPE_JSON = 100 new AggregateTranslator( 101 new LookupTranslator( 102 new String[][] { 103 {"\"", "\\\""}, 104 {"\\", "\\\\"}, 105 {"/", "\\/"} 106 }), 107 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 108 JavaUnicodeEscaper.outsideOf(32, 0x7f) 109 ); 110 111 /** 112 * Translator object for escaping XML. 113 * 114 * While {@link #escapeXml(String)} is the expected method of use, this 115 * object allows the XML escaping functionality to be used 116 * as the foundation for a custom translator. 117 * 118 * @since 3.0 119 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 120 */ 121 @Deprecated 122 public static final CharSequenceTranslator ESCAPE_XML = 123 new AggregateTranslator( 124 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 125 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 126 ); 127 128 /** 129 * Translator object for escaping XML 1.0. 130 * 131 * While {@link #escapeXml10(String)} is the expected method of use, this 132 * object allows the XML escaping functionality to be used 133 * as the foundation for a custom translator. 134 * 135 * @since 3.3 136 */ 137 public static final CharSequenceTranslator ESCAPE_XML10 = 138 new AggregateTranslator( 139 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 140 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 141 new LookupTranslator( 142 new String[][] { 143 { "\u0000", StringUtils.EMPTY }, 144 { "\u0001", StringUtils.EMPTY }, 145 { "\u0002", StringUtils.EMPTY }, 146 { "\u0003", StringUtils.EMPTY }, 147 { "\u0004", StringUtils.EMPTY }, 148 { "\u0005", StringUtils.EMPTY }, 149 { "\u0006", StringUtils.EMPTY }, 150 { "\u0007", StringUtils.EMPTY }, 151 { "\u0008", StringUtils.EMPTY }, 152 { "\u000b", StringUtils.EMPTY }, 153 { "\u000c", StringUtils.EMPTY }, 154 { "\u000e", StringUtils.EMPTY }, 155 { "\u000f", StringUtils.EMPTY }, 156 { "\u0010", StringUtils.EMPTY }, 157 { "\u0011", StringUtils.EMPTY }, 158 { "\u0012", StringUtils.EMPTY }, 159 { "\u0013", StringUtils.EMPTY }, 160 { "\u0014", StringUtils.EMPTY }, 161 { "\u0015", StringUtils.EMPTY }, 162 { "\u0016", StringUtils.EMPTY }, 163 { "\u0017", StringUtils.EMPTY }, 164 { "\u0018", StringUtils.EMPTY }, 165 { "\u0019", StringUtils.EMPTY }, 166 { "\u001a", StringUtils.EMPTY }, 167 { "\u001b", StringUtils.EMPTY }, 168 { "\u001c", StringUtils.EMPTY }, 169 { "\u001d", StringUtils.EMPTY }, 170 { "\u001e", StringUtils.EMPTY }, 171 { "\u001f", StringUtils.EMPTY }, 172 { "\ufffe", StringUtils.EMPTY }, 173 { "\uffff", StringUtils.EMPTY } 174 }), 175 NumericEntityEscaper.between(0x7f, 0x84), 176 NumericEntityEscaper.between(0x86, 0x9f), 177 new UnicodeUnpairedSurrogateRemover() 178 ); 179 180 /** 181 * Translator object for escaping XML 1.1. 182 * 183 * While {@link #escapeXml11(String)} is the expected method of use, this 184 * object allows the XML escaping functionality to be used 185 * as the foundation for a custom translator. 186 * 187 * @since 3.3 188 */ 189 public static final CharSequenceTranslator ESCAPE_XML11 = 190 new AggregateTranslator( 191 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 192 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 193 new LookupTranslator( 194 new String[][] { 195 { "\u0000", StringUtils.EMPTY }, 196 { "\u000b", "" }, 197 { "\u000c", "" }, 198 { "\ufffe", StringUtils.EMPTY }, 199 { "\uffff", StringUtils.EMPTY } 200 }), 201 NumericEntityEscaper.between(0x1, 0x8), 202 NumericEntityEscaper.between(0xe, 0x1f), 203 NumericEntityEscaper.between(0x7f, 0x84), 204 NumericEntityEscaper.between(0x86, 0x9f), 205 new UnicodeUnpairedSurrogateRemover() 206 ); 207 208 /** 209 * Translator object for escaping HTML version 3.0. 210 * 211 * While {@link #escapeHtml3(String)} is the expected method of use, this 212 * object allows the HTML escaping functionality to be used 213 * as the foundation for a custom translator. 214 * 215 * @since 3.0 216 */ 217 public static final CharSequenceTranslator ESCAPE_HTML3 = 218 new AggregateTranslator( 219 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 220 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 221 ); 222 223 /** 224 * Translator object for escaping HTML version 4.0. 225 * 226 * While {@link #escapeHtml4(String)} is the expected method of use, this 227 * object allows the HTML escaping functionality to be used 228 * as the foundation for a custom translator. 229 * 230 * @since 3.0 231 */ 232 public static final CharSequenceTranslator ESCAPE_HTML4 = 233 new AggregateTranslator( 234 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 235 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 236 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 237 ); 238 239 /** 240 * Translator object for escaping individual Comma Separated Values. 241 * 242 * While {@link #escapeCsv(String)} is the expected method of use, this 243 * object allows the CSV escaping functionality to be used 244 * as the foundation for a custom translator. 245 * 246 * @since 3.0 247 */ 248 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 249 250 // TODO: Create a parent class - 'SinglePassTranslator' ? 251 // It would handle the index checking + length returning, 252 // and could also have an optimization check method. 253 static class CsvEscaper extends CharSequenceTranslator { 254 255 private static final char CSV_DELIMITER = ','; 256 private static final char CSV_QUOTE = '"'; 257 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 258 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 259 260 @Override 261 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 262 263 if (index != 0) { 264 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 265 } 266 267 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 268 out.write(input.toString()); 269 } else { 270 out.write(CSV_QUOTE); 271 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 272 out.write(CSV_QUOTE); 273 } 274 return Character.codePointCount(input, 0, input.length()); 275 } 276 } 277 278 /* UNESCAPE TRANSLATORS */ 279 280 /** 281 * Translator object for unescaping escaped Java. 282 * 283 * While {@link #unescapeJava(String)} is the expected method of use, this 284 * object allows the Java unescaping functionality to be used 285 * as the foundation for a custom translator. 286 * 287 * @since 3.0 288 */ 289 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 290 public static final CharSequenceTranslator UNESCAPE_JAVA = 291 new AggregateTranslator( 292 new OctalUnescaper(), // .between('\1', '\377'), 293 new UnicodeUnescaper(), 294 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 295 new LookupTranslator( 296 new String[][] { 297 {"\\\\", "\\"}, 298 {"\\\"", "\""}, 299 {"\\'", "'"}, 300 {"\\", ""} 301 }) 302 ); 303 304 /** 305 * Translator object for unescaping escaped EcmaScript. 306 * 307 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 308 * object allows the EcmaScript unescaping functionality to be used 309 * as the foundation for a custom translator. 310 * 311 * @since 3.0 312 */ 313 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 314 315 /** 316 * Translator object for unescaping escaped Json. 317 * 318 * While {@link #unescapeJson(String)} is the expected method of use, this 319 * object allows the Json unescaping functionality to be used 320 * as the foundation for a custom translator. 321 * 322 * @since 3.2 323 */ 324 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 325 326 /** 327 * Translator object for unescaping escaped HTML 3.0. 328 * 329 * While {@link #unescapeHtml3(String)} is the expected method of use, this 330 * object allows the HTML unescaping functionality to be used 331 * as the foundation for a custom translator. 332 * 333 * @since 3.0 334 */ 335 public static final CharSequenceTranslator UNESCAPE_HTML3 = 336 new AggregateTranslator( 337 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 338 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 339 new NumericEntityUnescaper() 340 ); 341 342 /** 343 * Translator object for unescaping escaped HTML 4.0. 344 * 345 * While {@link #unescapeHtml4(String)} is the expected method of use, this 346 * object allows the HTML unescaping functionality to be used 347 * as the foundation for a custom translator. 348 * 349 * @since 3.0 350 */ 351 public static final CharSequenceTranslator UNESCAPE_HTML4 = 352 new AggregateTranslator( 353 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 354 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 355 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 356 new NumericEntityUnescaper() 357 ); 358 359 /** 360 * Translator object for unescaping escaped XML. 361 * 362 * While {@link #unescapeXml(String)} is the expected method of use, this 363 * object allows the XML unescaping functionality to be used 364 * as the foundation for a custom translator. 365 * 366 * @since 3.0 367 */ 368 public static final CharSequenceTranslator UNESCAPE_XML = 369 new AggregateTranslator( 370 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 371 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 372 new NumericEntityUnescaper() 373 ); 374 375 /** 376 * Translator object for unescaping escaped Comma Separated Value entries. 377 * 378 * While {@link #unescapeCsv(String)} is the expected method of use, this 379 * object allows the CSV unescaping functionality to be used 380 * as the foundation for a custom translator. 381 * 382 * @since 3.0 383 */ 384 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 385 386 static class CsvUnescaper extends CharSequenceTranslator { 387 388 private static final char CSV_DELIMITER = ','; 389 private static final char CSV_QUOTE = '"'; 390 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 391 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 392 393 @Override 394 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 395 396 if (index != 0) { 397 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 398 } 399 400 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 401 out.write(input.toString()); 402 return Character.codePointCount(input, 0, input.length()); 403 } 404 405 // strip quotes 406 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 407 408 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 409 // deal with escaped quotes; ie) "" 410 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 411 } else { 412 out.write(input.toString()); 413 } 414 return Character.codePointCount(input, 0, input.length()); 415 } 416 } 417 418 /* Helper functions */ 419 420 /** 421 * {@link StringEscapeUtils} instances should NOT be constructed in 422 * standard programming. 423 * 424 * <p>Instead, the class should be used as:</p> 425 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 426 * 427 * <p>This constructor is public to permit tools that require a JavaBean 428 * instance to operate.</p> 429 */ 430 public StringEscapeUtils() { 431 } 432 433 /** 434 * Escapes the characters in a {@link String} using Java String rules. 435 * 436 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 437 * 438 * <p>So a tab becomes the characters {@code '\\'} and 439 * {@code 't'}.</p> 440 * 441 * <p>The only difference between Java strings and JavaScript strings 442 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 443 * 444 * <p>Example:</p> 445 * <pre> 446 * input string: He didn't say, "Stop!" 447 * output string: He didn't say, \"Stop!\" 448 * </pre> 449 * 450 * @param input String to escape values in, may be null 451 * @return String with escaped values, {@code null} if null string input 452 */ 453 public static final String escapeJava(final String input) { 454 return ESCAPE_JAVA.translate(input); 455 } 456 457 /** 458 * Escapes the characters in a {@link String} using EcmaScript String rules. 459 * <p>Escapes any values it finds into their EcmaScript String form. 460 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 461 * 462 * <p>So a tab becomes the characters {@code '\\'} and 463 * {@code 't'}.</p> 464 * 465 * <p>The only difference between Java strings and EcmaScript strings 466 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 467 * 468 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 469 * 470 * <p>Example:</p> 471 * <pre> 472 * input string: He didn't say, "Stop!" 473 * output string: He didn\'t say, \"Stop!\" 474 * </pre> 475 * 476 * @param input String to escape values in, may be null 477 * @return String with escaped values, {@code null} if null string input 478 * 479 * @since 3.0 480 */ 481 public static final String escapeEcmaScript(final String input) { 482 return ESCAPE_ECMASCRIPT.translate(input); 483 } 484 485 /** 486 * Escapes the characters in a {@link String} using Json String rules. 487 * <p>Escapes any values it finds into their Json String form. 488 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 489 * 490 * <p>So a tab becomes the characters {@code '\\'} and 491 * {@code 't'}.</p> 492 * 493 * <p>The only difference between Java strings and Json strings 494 * is that in Json, forward-slash (/) is escaped.</p> 495 * 496 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 497 * 498 * <p>Example:</p> 499 * <pre> 500 * input string: He didn't say, "Stop!" 501 * output string: He didn't say, \"Stop!\" 502 * </pre> 503 * 504 * @param input String to escape values in, may be null 505 * @return String with escaped values, {@code null} if null string input 506 * 507 * @since 3.2 508 */ 509 public static final String escapeJson(final String input) { 510 return ESCAPE_JSON.translate(input); 511 } 512 513 /** 514 * Unescapes any Java literals found in the {@link String}. 515 * For example, it will turn a sequence of {@code '\'} and 516 * {@code 'n'} into a newline character, unless the {@code '\'} 517 * is preceded by another {@code '\'}. 518 * 519 * @param input the {@link String} to unescape, may be null 520 * @return a new unescaped {@link String}, {@code null} if null string input 521 */ 522 public static final String unescapeJava(final String input) { 523 return UNESCAPE_JAVA.translate(input); 524 } 525 526 /** 527 * Unescapes any EcmaScript literals found in the {@link String}. 528 * 529 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 530 * into a newline character, unless the {@code '\'} is preceded by another 531 * {@code '\'}.</p> 532 * 533 * @see #unescapeJava(String) 534 * @param input the {@link String} to unescape, may be null 535 * @return A new unescaped {@link String}, {@code null} if null string input 536 * 537 * @since 3.0 538 */ 539 public static final String unescapeEcmaScript(final String input) { 540 return UNESCAPE_ECMASCRIPT.translate(input); 541 } 542 543 /** 544 * Unescapes any Json literals found in the {@link String}. 545 * 546 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 547 * into a newline character, unless the {@code '\'} is preceded by another 548 * {@code '\'}.</p> 549 * 550 * @see #unescapeJava(String) 551 * @param input the {@link String} to unescape, may be null 552 * @return A new unescaped {@link String}, {@code null} if null string input 553 * 554 * @since 3.2 555 */ 556 public static final String unescapeJson(final String input) { 557 return UNESCAPE_JSON.translate(input); 558 } 559 560 /** 561 * Escapes the characters in a {@link String} using HTML entities. 562 * 563 * <p> 564 * For example: 565 * </p> 566 * <p>{@code "bread" & "butter"}</p> 567 * becomes: 568 * <p> 569 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 570 * </p> 571 * 572 * <p>Supports all known HTML 4.0 entities, including funky accents. 573 * Note that the commonly used apostrophe escape character (&apos;) 574 * is not a legal entity and so is not supported).</p> 575 * 576 * @param input the {@link String} to escape, may be null 577 * @return a new escaped {@link String}, {@code null} if null string input 578 * 579 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 580 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 581 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 582 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 583 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 584 * 585 * @since 3.0 586 */ 587 public static final String escapeHtml4(final String input) { 588 return ESCAPE_HTML4.translate(input); 589 } 590 591 /** 592 * Escapes the characters in a {@link String} using HTML entities. 593 * <p>Supports only the HTML 3.0 entities.</p> 594 * 595 * @param input the {@link String} to escape, may be null 596 * @return a new escaped {@link String}, {@code null} if null string input 597 * 598 * @since 3.0 599 */ 600 public static final String escapeHtml3(final String input) { 601 return ESCAPE_HTML3.translate(input); 602 } 603 604 /** 605 * Unescapes a string containing entity escapes to a string 606 * containing the actual Unicode characters corresponding to the 607 * escapes. Supports HTML 4.0 entities. 608 * 609 * <p>For example, the string {@code "<Français>"} 610 * will become {@code "<Français>"}</p> 611 * 612 * <p>If an entity is unrecognized, it is left alone, and inserted 613 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 614 * become {@code ">&zzzz;x"}.</p> 615 * 616 * @param input the {@link String} to unescape, may be null 617 * @return a new unescaped {@link String}, {@code null} if null string input 618 * 619 * @since 3.0 620 */ 621 public static final String unescapeHtml4(final String input) { 622 return UNESCAPE_HTML4.translate(input); 623 } 624 625 /** 626 * Unescapes a string containing entity escapes to a string 627 * containing the actual Unicode characters corresponding to the 628 * escapes. Supports only HTML 3.0 entities. 629 * 630 * @param input the {@link String} to unescape, may be null 631 * @return a new unescaped {@link String}, {@code null} if null string input 632 * 633 * @since 3.0 634 */ 635 public static final String unescapeHtml3(final String input) { 636 return UNESCAPE_HTML3.translate(input); 637 } 638 639 /** 640 * Escapes the characters in a {@link String} using XML entities. 641 * 642 * <p>For example: {@code "bread" & "butter"} => 643 * {@code "bread" & "butter"}. 644 * </p> 645 * 646 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 647 * Does not support DTDs or external entities.</p> 648 * 649 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 650 * escaped. If you still wish this functionality, you can achieve it 651 * via the following: 652 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 653 * 654 * @param input the {@link String} to escape, may be null 655 * @return a new escaped {@link String}, {@code null} if null string input 656 * @see #unescapeXml(String) 657 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 658 */ 659 @Deprecated 660 public static final String escapeXml(final String input) { 661 return ESCAPE_XML.translate(input); 662 } 663 664 /** 665 * Escapes the characters in a {@link String} using XML entities. 666 * 667 * <p>For example: {@code "bread" & "butter"} => 668 * {@code "bread" & "butter"}. 669 * </p> 670 * 671 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 672 * characters or unpaired Unicode surrogate code points, even after escaping. 673 * {@code escapeXml10} will remove characters that do not fit in the 674 * following ranges:</p> 675 * 676 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 677 * 678 * <p>Though not strictly necessary, {@code escapeXml10} will escape 679 * characters in the following ranges:</p> 680 * 681 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 682 * 683 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 684 * document. If you want to allow more non-text characters in an XML 1.1 685 * document, use {@link #escapeXml11(String)}.</p> 686 * 687 * @param input the {@link String} to escape, may be null 688 * @return a new escaped {@link String}, {@code null} if null string input 689 * @see #unescapeXml(String) 690 * @since 3.3 691 */ 692 public static String escapeXml10(final String input) { 693 return ESCAPE_XML10.translate(input); 694 } 695 696 /** 697 * Escapes the characters in a {@link String} using XML entities. 698 * 699 * <p>For example: {@code "bread" & "butter"} => 700 * {@code "bread" & "butter"}. 701 * </p> 702 * 703 * <p>XML 1.1 can represent certain control characters, but it cannot represent 704 * the null byte or unpaired Unicode surrogate code points, even after escaping. 705 * {@code escapeXml11} will remove characters that do not fit in the following 706 * ranges:</p> 707 * 708 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 709 * 710 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 711 * 712 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 713 * 714 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 715 * use it for XML 1.0 documents.</p> 716 * 717 * @param input the {@link String} to escape, may be null 718 * @return a new escaped {@link String}, {@code null} if null string input 719 * @see #unescapeXml(String) 720 * @since 3.3 721 */ 722 public static String escapeXml11(final String input) { 723 return ESCAPE_XML11.translate(input); 724 } 725 726 /** 727 * Unescapes a string containing XML entity escapes to a string 728 * containing the actual Unicode characters corresponding to the 729 * escapes. 730 * 731 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 732 * Does not support DTDs or external entities.</p> 733 * 734 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 735 * Unicode characters. This may change in future releases.</p> 736 * 737 * @param input the {@link String} to unescape, may be null 738 * @return a new unescaped {@link String}, {@code null} if null string input 739 * @see #escapeXml(String) 740 * @see #escapeXml10(String) 741 * @see #escapeXml11(String) 742 */ 743 public static final String unescapeXml(final String input) { 744 return UNESCAPE_XML.translate(input); 745 } 746 747 748 /** 749 * Returns a {@link String} value for a CSV column enclosed in double quotes, 750 * if required. 751 * 752 * <p>If the value contains a comma, newline or double quote, then the 753 * String value is returned enclosed in double quotes.</p> 754 * 755 * <p>Any double quote characters in the value are escaped with another double quote.</p> 756 * 757 * <p>If the value does not contain a comma, newline or double quote, then the 758 * String value is returned unchanged.</p> 759 * 760 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 761 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 762 * 763 * @param input the input CSV column String, may be null 764 * @return the input String, enclosed in double quotes if the value contains a comma, 765 * newline or double quote, {@code null} if null string input 766 * @since 2.4 767 */ 768 public static final String escapeCsv(final String input) { 769 return ESCAPE_CSV.translate(input); 770 } 771 772 /** 773 * Returns a {@link String} value for an unescaped CSV column. 774 * 775 * <p>If the value is enclosed in double quotes, and contains a comma, newline 776 * or double quote, then quotes are removed. 777 * </p> 778 * 779 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 780 * to just one double quote.</p> 781 * 782 * <p>If the value is not enclosed in double quotes, or is and does not contain a 783 * comma, newline or double quote, then the String value is returned unchanged.</p> 784 * 785 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 786 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 787 * 788 * @param input the input CSV column String, may be null 789 * @return the input String, with enclosing double quotes removed and embedded double 790 * quotes unescaped, {@code null} if null string input 791 * @since 2.4 792 */ 793 public static final String unescapeCsv(final String input) { 794 return UNESCAPE_CSV.translate(input); 795 } 796 797 }