001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * Escapes and unescapes {@link String}s for 035 * Java, Java Script, HTML and XML. 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 * @deprecated As of 3.6, use Apache Commons Text 040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 041 * StringEscapeUtils</a> instead 042 */ 043@Deprecated 044public class StringEscapeUtils { 045 046 /* ESCAPE TRANSLATORS */ 047 048 private static final class CsvEscaper extends CharSequenceTranslator { 049 050 private static final char CSV_DELIMITER = ','; 051 private static final char CSV_QUOTE = '"'; 052 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 053 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 054 055 @Override 056 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 057 if (index != 0) { 058 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 059 } 060 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 061 out.write(input.toString()); 062 } else { 063 out.write(CSV_QUOTE); 064 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 065 out.write(CSV_QUOTE); 066 } 067 return Character.codePointCount(input, 0, input.length()); 068 } 069 } 070 071 private static final class CsvUnescaper extends CharSequenceTranslator { 072 073 private static final char CSV_DELIMITER = ','; 074 private static final char CSV_QUOTE = '"'; 075 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 076 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 077 078 @Override 079 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 080 if (index != 0) { 081 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 082 } 083 if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { 084 out.write(input.toString()); 085 return Character.codePointCount(input, 0, input.length()); 086 } 087 // strip quotes 088 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 089 if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { 090 // deal with escaped quotes; ie) "" 091 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 092 } else { 093 out.write(input.toString()); 094 } 095 return Character.codePointCount(input, 0, input.length()); 096 } 097 } 098 099 /** 100 * Translator object for escaping Java. 101 * 102 * While {@link #escapeJava(String)} is the expected method of use, this 103 * object allows the Java escaping functionality to be used 104 * as the foundation for a custom translator. 105 * 106 * @since 3.0 107 */ 108 public static final CharSequenceTranslator ESCAPE_JAVA = 109 new LookupTranslator( 110 new String[][] { 111 {"\"", "\\\""}, 112 {"\\", "\\\\"}, 113 }).with( 114 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 115 ).with( 116 JavaUnicodeEscaper.outsideOf(32, 0x7f) 117 ); 118 119 /** 120 * Translator object for escaping EcmaScript/JavaScript. 121 * 122 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 123 * object allows the EcmaScript escaping functionality to be used 124 * as the foundation for a custom translator. 125 * 126 * @since 3.0 127 */ 128 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 129 new AggregateTranslator( 130 new LookupTranslator( 131 new String[][] { 132 {"'", "\\'"}, 133 {"\"", "\\\""}, 134 {"\\", "\\\\"}, 135 {"/", "\\/"} 136 }), 137 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 138 JavaUnicodeEscaper.outsideOf(32, 0x7f) 139 ); 140 141 /** 142 * Translator object for escaping Json. 143 * 144 * While {@link #escapeJson(String)} is the expected method of use, this 145 * object allows the Json escaping functionality to be used 146 * as the foundation for a custom translator. 147 * 148 * @since 3.2 149 */ 150 public static final CharSequenceTranslator ESCAPE_JSON = 151 new AggregateTranslator( 152 new LookupTranslator( 153 new String[][] { 154 {"\"", "\\\""}, 155 {"\\", "\\\\"}, 156 {"/", "\\/"} 157 }), 158 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 159 JavaUnicodeEscaper.outsideOf(32, 0x7f) 160 ); 161 162 /** 163 * Translator object for escaping XML. 164 * 165 * While {@link #escapeXml(String)} is the expected method of use, this 166 * object allows the XML escaping functionality to be used 167 * as the foundation for a custom translator. 168 * 169 * @since 3.0 170 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 171 */ 172 @Deprecated 173 public static final CharSequenceTranslator ESCAPE_XML = 174 new AggregateTranslator( 175 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 176 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 177 ); 178 179 /** 180 * Translator object for escaping XML 1.0. 181 * 182 * While {@link #escapeXml10(String)} is the expected method of use, this 183 * object allows the XML escaping functionality to be used 184 * as the foundation for a custom translator. 185 * 186 * @since 3.3 187 */ 188 public static final CharSequenceTranslator ESCAPE_XML10 = 189 new AggregateTranslator( 190 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 191 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 192 new LookupTranslator( 193 new String[][] { 194 { "\u0000", StringUtils.EMPTY }, 195 { "\u0001", StringUtils.EMPTY }, 196 { "\u0002", StringUtils.EMPTY }, 197 { "\u0003", StringUtils.EMPTY }, 198 { "\u0004", StringUtils.EMPTY }, 199 { "\u0005", StringUtils.EMPTY }, 200 { "\u0006", StringUtils.EMPTY }, 201 { "\u0007", StringUtils.EMPTY }, 202 { "\u0008", StringUtils.EMPTY }, 203 { "\u000b", StringUtils.EMPTY }, 204 { "\u000c", StringUtils.EMPTY }, 205 { "\u000e", StringUtils.EMPTY }, 206 { "\u000f", StringUtils.EMPTY }, 207 { "\u0010", StringUtils.EMPTY }, 208 { "\u0011", StringUtils.EMPTY }, 209 { "\u0012", StringUtils.EMPTY }, 210 { "\u0013", StringUtils.EMPTY }, 211 { "\u0014", StringUtils.EMPTY }, 212 { "\u0015", StringUtils.EMPTY }, 213 { "\u0016", StringUtils.EMPTY }, 214 { "\u0017", StringUtils.EMPTY }, 215 { "\u0018", StringUtils.EMPTY }, 216 { "\u0019", StringUtils.EMPTY }, 217 { "\u001a", StringUtils.EMPTY }, 218 { "\u001b", StringUtils.EMPTY }, 219 { "\u001c", StringUtils.EMPTY }, 220 { "\u001d", StringUtils.EMPTY }, 221 { "\u001e", StringUtils.EMPTY }, 222 { "\u001f", StringUtils.EMPTY }, 223 { "\ufffe", StringUtils.EMPTY }, 224 { "\uffff", StringUtils.EMPTY } 225 }), 226 NumericEntityEscaper.between(0x7f, 0x84), 227 NumericEntityEscaper.between(0x86, 0x9f), 228 new UnicodeUnpairedSurrogateRemover() 229 ); 230 231 /** 232 * Translator object for escaping XML 1.1. 233 * 234 * While {@link #escapeXml11(String)} is the expected method of use, this 235 * object allows the XML escaping functionality to be used 236 * as the foundation for a custom translator. 237 * 238 * @since 3.3 239 */ 240 public static final CharSequenceTranslator ESCAPE_XML11 = 241 new AggregateTranslator( 242 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 243 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 244 new LookupTranslator( 245 new String[][] { 246 { "\u0000", StringUtils.EMPTY }, 247 { "\u000b", "" }, 248 { "\u000c", "" }, 249 { "\ufffe", StringUtils.EMPTY }, 250 { "\uffff", StringUtils.EMPTY } 251 }), 252 NumericEntityEscaper.between(0x1, 0x8), 253 NumericEntityEscaper.between(0xe, 0x1f), 254 NumericEntityEscaper.between(0x7f, 0x84), 255 NumericEntityEscaper.between(0x86, 0x9f), 256 new UnicodeUnpairedSurrogateRemover() 257 ); 258 259 /** 260 * Translator object for escaping HTML version 3.0. 261 * 262 * While {@link #escapeHtml3(String)} is the expected method of use, this 263 * object allows the HTML escaping functionality to be used 264 * as the foundation for a custom translator. 265 * 266 * @since 3.0 267 */ 268 public static final CharSequenceTranslator ESCAPE_HTML3 = 269 new AggregateTranslator( 270 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 271 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 272 ); 273 274 /** 275 * Translator object for escaping HTML version 4.0. 276 * 277 * While {@link #escapeHtml4(String)} is the expected method of use, this 278 * object allows the HTML escaping functionality to be used 279 * as the foundation for a custom translator. 280 * 281 * @since 3.0 282 */ 283 public static final CharSequenceTranslator ESCAPE_HTML4 = 284 new AggregateTranslator( 285 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 286 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 287 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 288 ); 289 290 /* UNESCAPE TRANSLATORS */ 291 292 /** 293 * Translator object for escaping individual Comma Separated Values. 294 * 295 * While {@link #escapeCsv(String)} is the expected method of use, this 296 * object allows the CSV escaping functionality to be used 297 * as the foundation for a custom translator. 298 * 299 * @since 3.0 300 */ 301 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 302 303 /** 304 * Translator object for unescaping escaped Java. 305 * 306 * While {@link #unescapeJava(String)} is the expected method of use, this 307 * object allows the Java unescaping functionality to be used 308 * as the foundation for a custom translator. 309 * 310 * @since 3.0 311 */ 312 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 313 public static final CharSequenceTranslator UNESCAPE_JAVA = 314 new AggregateTranslator( 315 new OctalUnescaper(), // .between('\1', '\377'), 316 new UnicodeUnescaper(), 317 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 318 new LookupTranslator( 319 new String[][] { 320 {"\\\\", "\\"}, 321 {"\\\"", "\""}, 322 {"\\'", "'"}, 323 {"\\", ""} 324 }) 325 ); 326 327 /** 328 * Translator object for unescaping escaped EcmaScript. 329 * 330 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 331 * object allows the EcmaScript unescaping functionality to be used 332 * as the foundation for a custom translator. 333 * 334 * @since 3.0 335 */ 336 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 337 338 /** 339 * Translator object for unescaping escaped Json. 340 * 341 * While {@link #unescapeJson(String)} is the expected method of use, this 342 * object allows the Json unescaping functionality to be used 343 * as the foundation for a custom translator. 344 * 345 * @since 3.2 346 */ 347 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 348 349 /** 350 * Translator object for unescaping escaped HTML 3.0. 351 * 352 * While {@link #unescapeHtml3(String)} is the expected method of use, this 353 * object allows the HTML unescaping functionality to be used 354 * as the foundation for a custom translator. 355 * 356 * @since 3.0 357 */ 358 public static final CharSequenceTranslator UNESCAPE_HTML3 = 359 new AggregateTranslator( 360 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 361 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 362 new NumericEntityUnescaper() 363 ); 364 365 /** 366 * Translator object for unescaping escaped HTML 4.0. 367 * 368 * While {@link #unescapeHtml4(String)} is the expected method of use, this 369 * object allows the HTML unescaping functionality to be used 370 * as the foundation for a custom translator. 371 * 372 * @since 3.0 373 */ 374 public static final CharSequenceTranslator UNESCAPE_HTML4 = 375 new AggregateTranslator( 376 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 377 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 378 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 379 new NumericEntityUnescaper() 380 ); 381 382 /** 383 * Translator object for unescaping escaped XML. 384 * 385 * While {@link #unescapeXml(String)} is the expected method of use, this 386 * object allows the XML unescaping functionality to be used 387 * as the foundation for a custom translator. 388 * 389 * @since 3.0 390 */ 391 public static final CharSequenceTranslator UNESCAPE_XML = 392 new AggregateTranslator( 393 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 394 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 395 new NumericEntityUnescaper() 396 ); 397 398 /** 399 * Translator object for unescaping escaped Comma Separated Value entries. 400 * 401 * While {@link #unescapeCsv(String)} is the expected method of use, this 402 * object allows the CSV unescaping functionality to be used 403 * as the foundation for a custom translator. 404 * 405 * @since 3.0 406 */ 407 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 408 409 /* Helper functions */ 410 411 /** 412 * Returns a {@link String} value for a CSV column enclosed in double quotes, 413 * if required. 414 * 415 * <p>If the value contains a comma, newline or double quote, then the 416 * String value is returned enclosed in double quotes.</p> 417 * 418 * <p>Any double quote characters in the value are escaped with another double quote.</p> 419 * 420 * <p>If the value does not contain a comma, newline or double quote, then the 421 * String value is returned unchanged.</p> 422 * 423 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 424 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 425 * 426 * @param input the input CSV column String, may be null 427 * @return the input String, enclosed in double quotes if the value contains a comma, 428 * newline or double quote, {@code null} if null string input 429 * @since 2.4 430 */ 431 public static final String escapeCsv(final String input) { 432 return ESCAPE_CSV.translate(input); 433 } 434 435 /** 436 * Escapes the characters in a {@link String} using EcmaScript String rules. 437 * <p>Escapes any values it finds into their EcmaScript String form. 438 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 439 * 440 * <p>So a tab becomes the characters {@code '\\'} and 441 * {@code 't'}.</p> 442 * 443 * <p>The only difference between Java strings and EcmaScript strings 444 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 445 * 446 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 447 * 448 * <p>Example:</p> 449 * <pre> 450 * input string: He didn't say, "Stop!" 451 * output string: He didn\'t say, \"Stop!\" 452 * </pre> 453 * 454 * @param input String to escape values in, may be null 455 * @return String with escaped values, {@code null} if null string input 456 * @since 3.0 457 */ 458 public static final String escapeEcmaScript(final String input) { 459 return ESCAPE_ECMASCRIPT.translate(input); 460 } 461 462 /** 463 * Escapes the characters in a {@link String} using HTML entities. 464 * <p>Supports only the HTML 3.0 entities.</p> 465 * 466 * @param input the {@link String} to escape, may be null 467 * @return a new escaped {@link String}, {@code null} if null string input 468 * @since 3.0 469 */ 470 public static final String escapeHtml3(final String input) { 471 return ESCAPE_HTML3.translate(input); 472 } 473 474 /** 475 * Escapes the characters in a {@link String} using HTML entities. 476 * 477 * <p> 478 * For example: 479 * </p> 480 * <p>{@code "bread" & "butter"}</p> 481 * becomes: 482 * <p> 483 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 484 * </p> 485 * 486 * <p>Supports all known HTML 4.0 entities, including funky accents. 487 * Note that the commonly used apostrophe escape character (&apos;) 488 * is not a legal entity and so is not supported).</p> 489 * 490 * @param input the {@link String} to escape, may be null 491 * @return a new escaped {@link String}, {@code null} if null string input 492 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 493 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 494 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 495 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 496 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 497 * @since 3.0 498 */ 499 public static final String escapeHtml4(final String input) { 500 return ESCAPE_HTML4.translate(input); 501 } 502 503 /** 504 * Escapes the characters in a {@link String} using Java String rules. 505 * 506 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 507 * 508 * <p>So a tab becomes the characters {@code '\\'} and 509 * {@code 't'}.</p> 510 * 511 * <p>The only difference between Java strings and JavaScript strings 512 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 513 * 514 * <p>Example:</p> 515 * <pre> 516 * input string: He didn't say, "Stop!" 517 * output string: He didn't say, \"Stop!\" 518 * </pre> 519 * 520 * @param input String to escape values in, may be null 521 * @return String with escaped values, {@code null} if null string input 522 */ 523 public static final String escapeJava(final String input) { 524 return ESCAPE_JAVA.translate(input); 525 } 526 527 /** 528 * Escapes the characters in a {@link String} using Json String rules. 529 * <p>Escapes any values it finds into their Json String form. 530 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 531 * 532 * <p>So a tab becomes the characters {@code '\\'} and 533 * {@code 't'}.</p> 534 * 535 * <p>The only difference between Java strings and Json strings 536 * is that in Json, forward-slash (/) is escaped.</p> 537 * 538 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 539 * 540 * <p>Example:</p> 541 * <pre> 542 * input string: He didn't say, "Stop!" 543 * output string: He didn't say, \"Stop!\" 544 * </pre> 545 * 546 * @param input String to escape values in, may be null 547 * @return String with escaped values, {@code null} if null string input 548 * @since 3.2 549 */ 550 public static final String escapeJson(final String input) { 551 return ESCAPE_JSON.translate(input); 552 } 553 554 /** 555 * Escapes the characters in a {@link String} using XML entities. 556 * 557 * <p>For example: {@code "bread" & "butter"} => 558 * {@code "bread" & "butter"}. 559 * </p> 560 * 561 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 562 * Does not support DTDs or external entities.</p> 563 * 564 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 565 * escaped. If you still wish this functionality, you can achieve it 566 * via the following: 567 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p> 568 * 569 * @param input the {@link String} to escape, may be null 570 * @return a new escaped {@link String}, {@code null} if null string input 571 * @see #unescapeXml(String) 572 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 573 */ 574 @Deprecated 575 public static final String escapeXml(final String input) { 576 return ESCAPE_XML.translate(input); 577 } 578 579 /** 580 * Escapes the characters in a {@link String} using XML entities. 581 * <p> 582 * For example: 583 * </p> 584 * 585 * <pre>{@code 586 * "bread" & "butter" 587 * }</pre> 588 * <p> 589 * converts to: 590 * </p> 591 * 592 * <pre> 593 * {@code 594 * "bread" & "butter" 595 * } 596 * </pre> 597 * 598 * <p> 599 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The 600 * method {@code escapeXml10} will remove characters that do not fit in the following ranges: 601 * </p> 602 * 603 * <p> 604 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]} 605 * </p> 606 * 607 * <p> 608 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges: 609 * </p> 610 * 611 * <p> 612 * {@code [#x7F-#x84] | [#x86-#x9F]} 613 * </p> 614 * 615 * <p> 616 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use 617 * {@link #escapeXml11(String)}. 618 * </p> 619 * 620 * @param input the {@link String} to escape, may be null 621 * @return a new escaped {@link String}, {@code null} if null string input 622 * @see #unescapeXml(String) 623 * @since 3.3 624 */ 625 public static String escapeXml10(final String input) { 626 return ESCAPE_XML10.translate(input); 627 } 628 629 /** 630 * Escapes the characters in a {@link String} using XML entities. 631 * 632 * <p>For example: {@code "bread" & "butter"} => 633 * {@code "bread" & "butter"}. 634 * </p> 635 * 636 * <p>XML 1.1 can represent certain control characters, but it cannot represent 637 * the null byte or unpaired Unicode surrogate code points, even after escaping. 638 * {@code escapeXml11} will remove characters that do not fit in the following 639 * ranges:</p> 640 * 641 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 642 * 643 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 644 * 645 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 646 * 647 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 648 * use it for XML 1.0 documents.</p> 649 * 650 * @param input the {@link String} to escape, may be null 651 * @return a new escaped {@link String}, {@code null} if null string input 652 * @see #unescapeXml(String) 653 * @since 3.3 654 */ 655 public static String escapeXml11(final String input) { 656 return ESCAPE_XML11.translate(input); 657 } 658 659 /** 660 * Returns a {@link String} value for an unescaped CSV column. 661 * 662 * <p>If the value is enclosed in double quotes, and contains a comma, newline 663 * or double quote, then quotes are removed. 664 * </p> 665 * 666 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 667 * to just one double quote.</p> 668 * 669 * <p>If the value is not enclosed in double quotes, or is and does not contain a 670 * comma, newline or double quote, then the String value is returned unchanged.</p> 671 * 672 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 673 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 674 * 675 * @param input the input CSV column String, may be null 676 * @return the input String, with enclosing double quotes removed and embedded double 677 * quotes unescaped, {@code null} if null string input 678 * @since 2.4 679 */ 680 public static final String unescapeCsv(final String input) { 681 return UNESCAPE_CSV.translate(input); 682 } 683 684 /** 685 * Unescapes any EcmaScript literals found in the {@link String}. 686 * 687 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 688 * into a newline character, unless the {@code '\'} is preceded by another 689 * {@code '\'}.</p> 690 * 691 * @see #unescapeJava(String) 692 * @param input the {@link String} to unescape, may be null 693 * @return A new unescaped {@link String}, {@code null} if null string input 694 * @since 3.0 695 */ 696 public static final String unescapeEcmaScript(final String input) { 697 return UNESCAPE_ECMASCRIPT.translate(input); 698 } 699 700 /** 701 * Unescapes a string containing entity escapes to a string 702 * containing the actual Unicode characters corresponding to the 703 * escapes. Supports only HTML 3.0 entities. 704 * 705 * @param input the {@link String} to unescape, may be null 706 * @return a new unescaped {@link String}, {@code null} if null string input 707 * @since 3.0 708 */ 709 public static final String unescapeHtml3(final String input) { 710 return UNESCAPE_HTML3.translate(input); 711 } 712 713 /** 714 * Unescapes a string containing entity escapes to a string 715 * containing the actual Unicode characters corresponding to the 716 * escapes. Supports HTML 4.0 entities. 717 * 718 * <p>For example, the string {@code "<Français>"} 719 * will become {@code "<Français>"}</p> 720 * 721 * <p>If an entity is unrecognized, it is left alone, and inserted 722 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 723 * become {@code ">&zzzz;x"}.</p> 724 * 725 * @param input the {@link String} to unescape, may be null 726 * @return a new unescaped {@link String}, {@code null} if null string input 727 * @since 3.0 728 */ 729 public static final String unescapeHtml4(final String input) { 730 return UNESCAPE_HTML4.translate(input); 731 } 732 733 /** 734 * Unescapes any Java literals found in the {@link String}. 735 * For example, it will turn a sequence of {@code '\'} and 736 * {@code 'n'} into a newline character, unless the {@code '\'} 737 * is preceded by another {@code '\'}. 738 * 739 * @param input the {@link String} to unescape, may be null 740 * @return a new unescaped {@link String}, {@code null} if null string input 741 */ 742 public static final String unescapeJava(final String input) { 743 return UNESCAPE_JAVA.translate(input); 744 } 745 746 /** 747 * Unescapes any Json literals found in the {@link String}. 748 * 749 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 750 * into a newline character, unless the {@code '\'} is preceded by another 751 * {@code '\'}.</p> 752 * 753 * @see #unescapeJava(String) 754 * @param input the {@link String} to unescape, may be null 755 * @return A new unescaped {@link String}, {@code null} if null string input 756 * @since 3.2 757 */ 758 public static final String unescapeJson(final String input) { 759 return UNESCAPE_JSON.translate(input); 760 } 761 762 /** 763 * Unescapes a string containing XML entity escapes to a string 764 * containing the actual Unicode characters corresponding to the 765 * escapes. 766 * 767 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 768 * Does not support DTDs or external entities.</p> 769 * 770 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 771 * Unicode characters. This may change in future releases.</p> 772 * 773 * @param input the {@link String} to unescape, may be null 774 * @return a new unescaped {@link String}, {@code null} if null string input 775 * @see #escapeXml(String) 776 * @see #escapeXml10(String) 777 * @see #escapeXml11(String) 778 */ 779 public static final String unescapeXml(final String input) { 780 return UNESCAPE_XML.translate(input); 781 } 782 783 /** 784 * {@link StringEscapeUtils} instances should NOT be constructed in 785 * standard programming. 786 * 787 * <p>Instead, the class should be used as:</p> 788 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 789 * 790 * <p>This constructor is public to permit tools that require a JavaBean 791 * instance to operate.</p> 792 * 793 * @deprecated TODO Make private in 4.0. 794 */ 795 @Deprecated 796 public StringEscapeUtils() { 797 // empty 798 } 799 800}