001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * Escapes and unescapes {@link String}s for 035 * Java, Java Script, HTML and XML. 036 * 037 * <p>#ThreadSafe#</p> 038 * 039 * @since 2.0 040 * @deprecated As of 3.6, use Apache Commons Text 041 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 042 * StringEscapeUtils</a> instead. 043 */ 044@Deprecated 045public class StringEscapeUtils { 046 047 /* ESCAPE TRANSLATORS */ 048 049 private static final class CsvEscaper extends CharSequenceTranslator { 050 051 private static final char CSV_DELIMITER = ','; 052 private static final char CSV_QUOTE = '"'; 053 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 054 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 055 056 @Override 057 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 058 if (index != 0) { 059 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 060 } 061 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 062 out.write(input.toString()); 063 } else { 064 out.write(CSV_QUOTE); 065 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 066 out.write(CSV_QUOTE); 067 } 068 return Character.codePointCount(input, 0, input.length()); 069 } 070 } 071 072 private static final class CsvUnescaper extends CharSequenceTranslator { 073 074 private static final char CSV_DELIMITER = ','; 075 private static final char CSV_QUOTE = '"'; 076 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 077 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 078 079 @Override 080 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 081 if (index != 0) { 082 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 083 } 084 if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { 085 out.write(input.toString()); 086 return Character.codePointCount(input, 0, input.length()); 087 } 088 // strip quotes 089 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 090 if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { 091 // deal with escaped quotes; ie) "" 092 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 093 } else { 094 out.write(input.toString()); 095 } 096 return Character.codePointCount(input, 0, input.length()); 097 } 098 } 099 100 /** 101 * Translator object for escaping Java. 102 * 103 * While {@link #escapeJava(String)} is the expected method of use, this 104 * object allows the Java escaping functionality to be used 105 * as the foundation for a custom translator. 106 * 107 * @since 3.0 108 */ 109 public static final CharSequenceTranslator ESCAPE_JAVA = 110 new LookupTranslator( 111 new String[][] { 112 {"\"", "\\\""}, 113 {"\\", "\\\\"}, 114 }).with( 115 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 116 ).with( 117 JavaUnicodeEscaper.outsideOf(32, 0x7f) 118 ); 119 120 /** 121 * Translator object for escaping EcmaScript/JavaScript. 122 * 123 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 124 * object allows the EcmaScript escaping functionality to be used 125 * as the foundation for a custom translator. 126 * 127 * @since 3.0 128 */ 129 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 130 new AggregateTranslator( 131 new LookupTranslator( 132 new String[][] { 133 {"'", "\\'"}, 134 {"\"", "\\\""}, 135 {"\\", "\\\\"}, 136 {"/", "\\/"} 137 }), 138 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 139 JavaUnicodeEscaper.outsideOf(32, 0x7f) 140 ); 141 142 /** 143 * Translator object for escaping Json. 144 * 145 * While {@link #escapeJson(String)} is the expected method of use, this 146 * object allows the Json escaping functionality to be used 147 * as the foundation for a custom translator. 148 * 149 * @since 3.2 150 */ 151 public static final CharSequenceTranslator ESCAPE_JSON = 152 new AggregateTranslator( 153 new LookupTranslator( 154 new String[][] { 155 {"\"", "\\\""}, 156 {"\\", "\\\\"}, 157 {"/", "\\/"} 158 }), 159 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 160 JavaUnicodeEscaper.outsideOf(32, 0x7f) 161 ); 162 163 /** 164 * Translator object for escaping XML. 165 * 166 * While {@link #escapeXml(String)} is the expected method of use, this 167 * object allows the XML escaping functionality to be used 168 * as the foundation for a custom translator. 169 * 170 * @since 3.0 171 * @deprecated Use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 172 */ 173 @Deprecated 174 public static final CharSequenceTranslator ESCAPE_XML = 175 new AggregateTranslator( 176 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 177 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 178 ); 179 180 /** 181 * Translator object for escaping XML 1.0. 182 * 183 * While {@link #escapeXml10(String)} is the expected method of use, this 184 * object allows the XML escaping functionality to be used 185 * as the foundation for a custom translator. 186 * 187 * @since 3.3 188 */ 189 public static final CharSequenceTranslator ESCAPE_XML10 = 190 new AggregateTranslator( 191 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 192 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 193 new LookupTranslator( 194 new String[][] { 195 { "\u0000", StringUtils.EMPTY }, 196 { "\u0001", StringUtils.EMPTY }, 197 { "\u0002", StringUtils.EMPTY }, 198 { "\u0003", StringUtils.EMPTY }, 199 { "\u0004", StringUtils.EMPTY }, 200 { "\u0005", StringUtils.EMPTY }, 201 { "\u0006", StringUtils.EMPTY }, 202 { "\u0007", StringUtils.EMPTY }, 203 { "\u0008", StringUtils.EMPTY }, 204 { "\u000b", StringUtils.EMPTY }, 205 { "\u000c", StringUtils.EMPTY }, 206 { "\u000e", StringUtils.EMPTY }, 207 { "\u000f", StringUtils.EMPTY }, 208 { "\u0010", StringUtils.EMPTY }, 209 { "\u0011", StringUtils.EMPTY }, 210 { "\u0012", StringUtils.EMPTY }, 211 { "\u0013", StringUtils.EMPTY }, 212 { "\u0014", StringUtils.EMPTY }, 213 { "\u0015", StringUtils.EMPTY }, 214 { "\u0016", StringUtils.EMPTY }, 215 { "\u0017", StringUtils.EMPTY }, 216 { "\u0018", StringUtils.EMPTY }, 217 { "\u0019", StringUtils.EMPTY }, 218 { "\u001a", StringUtils.EMPTY }, 219 { "\u001b", StringUtils.EMPTY }, 220 { "\u001c", StringUtils.EMPTY }, 221 { "\u001d", StringUtils.EMPTY }, 222 { "\u001e", StringUtils.EMPTY }, 223 { "\u001f", StringUtils.EMPTY }, 224 { "\ufffe", StringUtils.EMPTY }, 225 { "\uffff", StringUtils.EMPTY } 226 }), 227 NumericEntityEscaper.between(0x7f, 0x84), 228 NumericEntityEscaper.between(0x86, 0x9f), 229 new UnicodeUnpairedSurrogateRemover() 230 ); 231 232 /** 233 * Translator object for escaping XML 1.1. 234 * 235 * While {@link #escapeXml11(String)} is the expected method of use, this 236 * object allows the XML escaping functionality to be used 237 * as the foundation for a custom translator. 238 * 239 * @since 3.3 240 */ 241 public static final CharSequenceTranslator ESCAPE_XML11 = 242 new AggregateTranslator( 243 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 244 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 245 new LookupTranslator( 246 new String[][] { 247 { "\u0000", StringUtils.EMPTY }, 248 { "\u000b", "" }, 249 { "\u000c", "" }, 250 { "\ufffe", StringUtils.EMPTY }, 251 { "\uffff", StringUtils.EMPTY } 252 }), 253 NumericEntityEscaper.between(0x1, 0x8), 254 NumericEntityEscaper.between(0xe, 0x1f), 255 NumericEntityEscaper.between(0x7f, 0x84), 256 NumericEntityEscaper.between(0x86, 0x9f), 257 new UnicodeUnpairedSurrogateRemover() 258 ); 259 260 /** 261 * Translator object for escaping HTML version 3.0. 262 * 263 * While {@link #escapeHtml3(String)} is the expected method of use, this 264 * object allows the HTML escaping functionality to be used 265 * as the foundation for a custom translator. 266 * 267 * @since 3.0 268 */ 269 public static final CharSequenceTranslator ESCAPE_HTML3 = 270 new AggregateTranslator( 271 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 272 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 273 ); 274 275 /** 276 * Translator object for escaping HTML version 4.0. 277 * 278 * While {@link #escapeHtml4(String)} is the expected method of use, this 279 * object allows the HTML escaping functionality to be used 280 * as the foundation for a custom translator. 281 * 282 * @since 3.0 283 */ 284 public static final CharSequenceTranslator ESCAPE_HTML4 = 285 new AggregateTranslator( 286 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 287 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 288 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 289 ); 290 291 /* UNESCAPE TRANSLATORS */ 292 293 /** 294 * Translator object for escaping individual Comma Separated Values. 295 * 296 * While {@link #escapeCsv(String)} is the expected method of use, this 297 * object allows the CSV escaping functionality to be used 298 * as the foundation for a custom translator. 299 * 300 * @since 3.0 301 */ 302 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 303 304 /** 305 * Translator object for unescaping escaped Java. 306 * 307 * While {@link #unescapeJava(String)} is the expected method of use, this 308 * object allows the Java unescaping functionality to be used 309 * as the foundation for a custom translator. 310 * 311 * @since 3.0 312 */ 313 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 314 public static final CharSequenceTranslator UNESCAPE_JAVA = 315 new AggregateTranslator( 316 new OctalUnescaper(), // .between('\1', '\377'), 317 new UnicodeUnescaper(), 318 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 319 new LookupTranslator( 320 new String[][] { 321 {"\\\\", "\\"}, 322 {"\\\"", "\""}, 323 {"\\'", "'"}, 324 {"\\", ""} 325 }) 326 ); 327 328 /** 329 * Translator object for unescaping escaped EcmaScript. 330 * 331 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 332 * object allows the EcmaScript unescaping functionality to be used 333 * as the foundation for a custom translator. 334 * 335 * @since 3.0 336 */ 337 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 338 339 /** 340 * Translator object for unescaping escaped Json. 341 * 342 * While {@link #unescapeJson(String)} is the expected method of use, this 343 * object allows the Json unescaping functionality to be used 344 * as the foundation for a custom translator. 345 * 346 * @since 3.2 347 */ 348 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 349 350 /** 351 * Translator object for unescaping escaped HTML 3.0. 352 * 353 * While {@link #unescapeHtml3(String)} is the expected method of use, this 354 * object allows the HTML unescaping functionality to be used 355 * as the foundation for a custom translator. 356 * 357 * @since 3.0 358 */ 359 public static final CharSequenceTranslator UNESCAPE_HTML3 = 360 new AggregateTranslator( 361 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 362 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 363 new NumericEntityUnescaper() 364 ); 365 366 /** 367 * Translator object for unescaping escaped HTML 4.0. 368 * 369 * While {@link #unescapeHtml4(String)} is the expected method of use, this 370 * object allows the HTML unescaping functionality to be used 371 * as the foundation for a custom translator. 372 * 373 * @since 3.0 374 */ 375 public static final CharSequenceTranslator UNESCAPE_HTML4 = 376 new AggregateTranslator( 377 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 378 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 379 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 380 new NumericEntityUnescaper() 381 ); 382 383 /** 384 * Translator object for unescaping escaped XML. 385 * 386 * While {@link #unescapeXml(String)} is the expected method of use, this 387 * object allows the XML unescaping functionality to be used 388 * as the foundation for a custom translator. 389 * 390 * @since 3.0 391 */ 392 public static final CharSequenceTranslator UNESCAPE_XML = 393 new AggregateTranslator( 394 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 395 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 396 new NumericEntityUnescaper() 397 ); 398 399 /** 400 * Translator object for unescaping escaped Comma Separated Value entries. 401 * 402 * While {@link #unescapeCsv(String)} is the expected method of use, this 403 * object allows the CSV unescaping functionality to be used 404 * as the foundation for a custom translator. 405 * 406 * @since 3.0 407 */ 408 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 409 410 /* Helper functions */ 411 412 /** 413 * Returns a {@link String} value for a CSV column enclosed in double quotes, 414 * if required. 415 * 416 * <p>If the value contains a comma, newline or double quote, then the 417 * String value is returned enclosed in double quotes.</p> 418 * 419 * <p>Any double quote characters in the value are escaped with another double quote.</p> 420 * 421 * <p>If the value does not contain a comma, newline or double quote, then the 422 * String value is returned unchanged.</p> 423 * 424 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 425 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 426 * 427 * @param input the input CSV column String, may be null 428 * @return the input String, enclosed in double quotes if the value contains a comma, 429 * newline or double quote, {@code null} if null string input 430 * @since 2.4 431 */ 432 public static final String escapeCsv(final String input) { 433 return ESCAPE_CSV.translate(input); 434 } 435 436 /** 437 * Escapes the characters in a {@link String} using EcmaScript String rules. 438 * <p>Escapes any values it finds into their EcmaScript String form. 439 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 440 * 441 * <p>So a tab becomes the characters {@code '\\'} and 442 * {@code 't'}.</p> 443 * 444 * <p>The only difference between Java strings and EcmaScript strings 445 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 446 * 447 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 448 * 449 * <p>Example:</p> 450 * <pre> 451 * input string: He didn't say, "Stop!" 452 * output string: He didn\'t say, \"Stop!\" 453 * </pre> 454 * 455 * @param input String to escape values in, may be null 456 * @return String with escaped values, {@code null} if null string input 457 * @since 3.0 458 */ 459 public static final String escapeEcmaScript(final String input) { 460 return ESCAPE_ECMASCRIPT.translate(input); 461 } 462 463 /** 464 * Escapes the characters in a {@link String} using HTML entities. 465 * <p>Supports only the HTML 3.0 entities.</p> 466 * 467 * @param input the {@link String} to escape, may be null 468 * @return a new escaped {@link String}, {@code null} if null string input 469 * @since 3.0 470 */ 471 public static final String escapeHtml3(final String input) { 472 return ESCAPE_HTML3.translate(input); 473 } 474 475 /** 476 * Escapes the characters in a {@link String} using HTML entities. 477 * 478 * <p> 479 * For example: 480 * </p> 481 * <p>{@code "bread" & "butter"}</p> 482 * becomes: 483 * <p> 484 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 485 * </p> 486 * 487 * <p>Supports all known HTML 4.0 entities, including funky accents. 488 * Note that the commonly used apostrophe escape character (&apos;) 489 * is not a legal entity and so is not supported).</p> 490 * 491 * @param input the {@link String} to escape, may be null 492 * @return a new escaped {@link String}, {@code null} if null string input 493 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 494 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 495 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 496 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 497 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 498 * @since 3.0 499 */ 500 public static final String escapeHtml4(final String input) { 501 return ESCAPE_HTML4.translate(input); 502 } 503 504 /** 505 * Escapes the characters in a {@link String} using Java String rules. 506 * 507 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 508 * 509 * <p>So a tab becomes the characters {@code '\\'} and 510 * {@code 't'}.</p> 511 * 512 * <p>The only difference between Java strings and JavaScript strings 513 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 514 * 515 * <p>Example:</p> 516 * <pre> 517 * input string: He didn't say, "Stop!" 518 * output string: He didn't say, \"Stop!\" 519 * </pre> 520 * 521 * @param input String to escape values in, may be null 522 * @return String with escaped values, {@code null} if null string input 523 */ 524 public static final String escapeJava(final String input) { 525 return ESCAPE_JAVA.translate(input); 526 } 527 528 /** 529 * Escapes the characters in a {@link String} using Json String rules. 530 * <p>Escapes any values it finds into their Json String form. 531 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 532 * 533 * <p>So a tab becomes the characters {@code '\\'} and 534 * {@code 't'}.</p> 535 * 536 * <p>The only difference between Java strings and Json strings 537 * is that in Json, forward-slash (/) is escaped.</p> 538 * 539 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 540 * 541 * <p>Example:</p> 542 * <pre> 543 * input string: He didn't say, "Stop!" 544 * output string: He didn't say, \"Stop!\" 545 * </pre> 546 * 547 * @param input String to escape values in, may be null 548 * @return String with escaped values, {@code null} if null string input 549 * @since 3.2 550 */ 551 public static final String escapeJson(final String input) { 552 return ESCAPE_JSON.translate(input); 553 } 554 555 /** 556 * Escapes the characters in a {@link String} using XML entities. 557 * 558 * <p>For example: {@code "bread" & "butter"} => 559 * {@code "bread" & "butter"}. 560 * </p> 561 * 562 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 563 * Does not support DTDs or external entities.</p> 564 * 565 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 566 * escaped. If you still wish this functionality, you can achieve it 567 * via the following: 568 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p> 569 * 570 * @param input the {@link String} to escape, may be null 571 * @return a new escaped {@link String}, {@code null} if null string input 572 * @see #unescapeXml(String) 573 * @deprecated Use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 574 */ 575 @Deprecated 576 public static final String escapeXml(final String input) { 577 return ESCAPE_XML.translate(input); 578 } 579 580 /** 581 * Escapes the characters in a {@link String} using XML entities. 582 * <p> 583 * For example: 584 * </p> 585 * 586 * <pre>{@code 587 * "bread" & "butter" 588 * }</pre> 589 * <p> 590 * converts to: 591 * </p> 592 * 593 * <pre> 594 * {@code 595 * "bread" & "butter" 596 * } 597 * </pre> 598 * 599 * <p> 600 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The 601 * method {@code escapeXml10} will remove characters that do not fit in the following ranges: 602 * </p> 603 * 604 * <p> 605 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]} 606 * </p> 607 * 608 * <p> 609 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges: 610 * </p> 611 * 612 * <p> 613 * {@code [#x7F-#x84] | [#x86-#x9F]} 614 * </p> 615 * 616 * <p> 617 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use 618 * {@link #escapeXml11(String)}. 619 * </p> 620 * 621 * @param input the {@link String} to escape, may be null 622 * @return a new escaped {@link String}, {@code null} if null string input 623 * @see #unescapeXml(String) 624 * @since 3.3 625 */ 626 public static String escapeXml10(final String input) { 627 return ESCAPE_XML10.translate(input); 628 } 629 630 /** 631 * Escapes the characters in a {@link String} using XML entities. 632 * 633 * <p>For example: {@code "bread" & "butter"} => 634 * {@code "bread" & "butter"}. 635 * </p> 636 * 637 * <p>XML 1.1 can represent certain control characters, but it cannot represent 638 * the null byte or unpaired Unicode surrogate code points, even after escaping. 639 * {@code escapeXml11} will remove characters that do not fit in the following 640 * ranges:</p> 641 * 642 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 643 * 644 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 645 * 646 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 647 * 648 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 649 * use it for XML 1.0 documents.</p> 650 * 651 * @param input the {@link String} to escape, may be null 652 * @return a new escaped {@link String}, {@code null} if null string input 653 * @see #unescapeXml(String) 654 * @since 3.3 655 */ 656 public static String escapeXml11(final String input) { 657 return ESCAPE_XML11.translate(input); 658 } 659 660 /** 661 * Returns a {@link String} value for an unescaped CSV column. 662 * 663 * <p>If the value is enclosed in double quotes, and contains a comma, newline 664 * or double quote, then quotes are removed. 665 * </p> 666 * 667 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 668 * to just one double quote.</p> 669 * 670 * <p>If the value is not enclosed in double quotes, or is and does not contain a 671 * comma, newline or double quote, then the String value is returned unchanged.</p> 672 * 673 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 674 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 675 * 676 * @param input the input CSV column String, may be null 677 * @return the input String, with enclosing double quotes removed and embedded double 678 * quotes unescaped, {@code null} if null string input 679 * @since 2.4 680 */ 681 public static final String unescapeCsv(final String input) { 682 return UNESCAPE_CSV.translate(input); 683 } 684 685 /** 686 * Unescapes any EcmaScript literals found in the {@link String}. 687 * 688 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 689 * into a newline character, unless the {@code '\'} is preceded by another 690 * {@code '\'}.</p> 691 * 692 * @see #unescapeJava(String) 693 * @param input the {@link String} to unescape, may be null 694 * @return A new unescaped {@link String}, {@code null} if null string input 695 * @since 3.0 696 */ 697 public static final String unescapeEcmaScript(final String input) { 698 return UNESCAPE_ECMASCRIPT.translate(input); 699 } 700 701 /** 702 * Unescapes a string containing entity escapes to a string 703 * containing the actual Unicode characters corresponding to the 704 * escapes. Supports only HTML 3.0 entities. 705 * 706 * @param input the {@link String} to unescape, may be null 707 * @return a new unescaped {@link String}, {@code null} if null string input 708 * @since 3.0 709 */ 710 public static final String unescapeHtml3(final String input) { 711 return UNESCAPE_HTML3.translate(input); 712 } 713 714 /** 715 * Unescapes a string containing entity escapes to a string 716 * containing the actual Unicode characters corresponding to the 717 * escapes. Supports HTML 4.0 entities. 718 * 719 * <p>For example, the string {@code "<Français>"} 720 * will become {@code "<Français>"}</p> 721 * 722 * <p>If an entity is unrecognized, it is left alone, and inserted 723 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 724 * become {@code ">&zzzz;x"}.</p> 725 * 726 * @param input the {@link String} to unescape, may be null 727 * @return a new unescaped {@link String}, {@code null} if null string input 728 * @since 3.0 729 */ 730 public static final String unescapeHtml4(final String input) { 731 return UNESCAPE_HTML4.translate(input); 732 } 733 734 /** 735 * Unescapes any Java literals found in the {@link String}. 736 * For example, it will turn a sequence of {@code '\'} and 737 * {@code 'n'} into a newline character, unless the {@code '\'} 738 * is preceded by another {@code '\'}. 739 * 740 * @param input the {@link String} to unescape, may be null 741 * @return a new unescaped {@link String}, {@code null} if null string input 742 */ 743 public static final String unescapeJava(final String input) { 744 return UNESCAPE_JAVA.translate(input); 745 } 746 747 /** 748 * Unescapes any Json literals found in the {@link String}. 749 * 750 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 751 * into a newline character, unless the {@code '\'} is preceded by another 752 * {@code '\'}.</p> 753 * 754 * @see #unescapeJava(String) 755 * @param input the {@link String} to unescape, may be null 756 * @return A new unescaped {@link String}, {@code null} if null string input 757 * @since 3.2 758 */ 759 public static final String unescapeJson(final String input) { 760 return UNESCAPE_JSON.translate(input); 761 } 762 763 /** 764 * Unescapes a string containing XML entity escapes to a string 765 * containing the actual Unicode characters corresponding to the 766 * escapes. 767 * 768 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 769 * Does not support DTDs or external entities.</p> 770 * 771 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 772 * Unicode characters. This may change in future releases.</p> 773 * 774 * @param input the {@link String} to unescape, may be null 775 * @return a new unescaped {@link String}, {@code null} if null string input 776 * @see #escapeXml(String) 777 * @see #escapeXml10(String) 778 * @see #escapeXml11(String) 779 */ 780 public static final String unescapeXml(final String input) { 781 return UNESCAPE_XML.translate(input); 782 } 783 784 /** 785 * {@link StringEscapeUtils} instances should NOT be constructed in 786 * standard programming. 787 * 788 * <p>Instead, the class should be used as:</p> 789 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 790 * 791 * <p>This constructor is public to permit tools that require a JavaBean 792 * instance to operate.</p> 793 * 794 * @deprecated TODO Make private in 4.0. 795 */ 796 @Deprecated 797 public StringEscapeUtils() { 798 // empty 799 } 800 801}