001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * Escapes and unescapes {@link String}s for 035 * Java, Java Script, HTML and XML. 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 * @deprecated As of 3.6, use Apache Commons Text 040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 041 * StringEscapeUtils</a> instead 042 */ 043@Deprecated 044public class StringEscapeUtils { 045 046 /* ESCAPE TRANSLATORS */ 047 048 // TODO: Create a parent class - 'SinglePassTranslator' ? 049 // It would handle the index checking + length returning, 050 // and could also have an optimization check method. 051 static class CsvEscaper extends CharSequenceTranslator { 052 053 private static final char CSV_DELIMITER = ','; 054 private static final char CSV_QUOTE = '"'; 055 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 056 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 057 058 @Override 059 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 060 061 if (index != 0) { 062 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 063 } 064 065 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 066 out.write(input.toString()); 067 } else { 068 out.write(CSV_QUOTE); 069 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 070 out.write(CSV_QUOTE); 071 } 072 return Character.codePointCount(input, 0, input.length()); 073 } 074 } 075 076 static class CsvUnescaper extends CharSequenceTranslator { 077 078 private static final char CSV_DELIMITER = ','; 079 private static final char CSV_QUOTE = '"'; 080 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 081 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 082 083 @Override 084 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 085 086 if (index != 0) { 087 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 088 } 089 090 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 091 out.write(input.toString()); 092 return Character.codePointCount(input, 0, input.length()); 093 } 094 095 // strip quotes 096 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 097 098 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 099 // deal with escaped quotes; ie) "" 100 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 101 } else { 102 out.write(input.toString()); 103 } 104 return Character.codePointCount(input, 0, input.length()); 105 } 106 } 107 108 /** 109 * Translator object for escaping Java. 110 * 111 * While {@link #escapeJava(String)} is the expected method of use, this 112 * object allows the Java escaping functionality to be used 113 * as the foundation for a custom translator. 114 * 115 * @since 3.0 116 */ 117 public static final CharSequenceTranslator ESCAPE_JAVA = 118 new LookupTranslator( 119 new String[][] { 120 {"\"", "\\\""}, 121 {"\\", "\\\\"}, 122 }).with( 123 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 124 ).with( 125 JavaUnicodeEscaper.outsideOf(32, 0x7f) 126 ); 127 128 /** 129 * Translator object for escaping EcmaScript/JavaScript. 130 * 131 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 132 * object allows the EcmaScript escaping functionality to be used 133 * as the foundation for a custom translator. 134 * 135 * @since 3.0 136 */ 137 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 138 new AggregateTranslator( 139 new LookupTranslator( 140 new String[][] { 141 {"'", "\\'"}, 142 {"\"", "\\\""}, 143 {"\\", "\\\\"}, 144 {"/", "\\/"} 145 }), 146 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 147 JavaUnicodeEscaper.outsideOf(32, 0x7f) 148 ); 149 150 /** 151 * Translator object for escaping Json. 152 * 153 * While {@link #escapeJson(String)} is the expected method of use, this 154 * object allows the Json escaping functionality to be used 155 * as the foundation for a custom translator. 156 * 157 * @since 3.2 158 */ 159 public static final CharSequenceTranslator ESCAPE_JSON = 160 new AggregateTranslator( 161 new LookupTranslator( 162 new String[][] { 163 {"\"", "\\\""}, 164 {"\\", "\\\\"}, 165 {"/", "\\/"} 166 }), 167 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 168 JavaUnicodeEscaper.outsideOf(32, 0x7f) 169 ); 170 171 /** 172 * Translator object for escaping XML. 173 * 174 * While {@link #escapeXml(String)} is the expected method of use, this 175 * object allows the XML escaping functionality to be used 176 * as the foundation for a custom translator. 177 * 178 * @since 3.0 179 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 180 */ 181 @Deprecated 182 public static final CharSequenceTranslator ESCAPE_XML = 183 new AggregateTranslator( 184 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 185 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 186 ); 187 188 /** 189 * Translator object for escaping XML 1.0. 190 * 191 * While {@link #escapeXml10(String)} is the expected method of use, this 192 * object allows the XML escaping functionality to be used 193 * as the foundation for a custom translator. 194 * 195 * @since 3.3 196 */ 197 public static final CharSequenceTranslator ESCAPE_XML10 = 198 new AggregateTranslator( 199 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 200 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 201 new LookupTranslator( 202 new String[][] { 203 { "\u0000", StringUtils.EMPTY }, 204 { "\u0001", StringUtils.EMPTY }, 205 { "\u0002", StringUtils.EMPTY }, 206 { "\u0003", StringUtils.EMPTY }, 207 { "\u0004", StringUtils.EMPTY }, 208 { "\u0005", StringUtils.EMPTY }, 209 { "\u0006", StringUtils.EMPTY }, 210 { "\u0007", StringUtils.EMPTY }, 211 { "\u0008", StringUtils.EMPTY }, 212 { "\u000b", StringUtils.EMPTY }, 213 { "\u000c", StringUtils.EMPTY }, 214 { "\u000e", StringUtils.EMPTY }, 215 { "\u000f", StringUtils.EMPTY }, 216 { "\u0010", StringUtils.EMPTY }, 217 { "\u0011", StringUtils.EMPTY }, 218 { "\u0012", StringUtils.EMPTY }, 219 { "\u0013", StringUtils.EMPTY }, 220 { "\u0014", StringUtils.EMPTY }, 221 { "\u0015", StringUtils.EMPTY }, 222 { "\u0016", StringUtils.EMPTY }, 223 { "\u0017", StringUtils.EMPTY }, 224 { "\u0018", StringUtils.EMPTY }, 225 { "\u0019", StringUtils.EMPTY }, 226 { "\u001a", StringUtils.EMPTY }, 227 { "\u001b", StringUtils.EMPTY }, 228 { "\u001c", StringUtils.EMPTY }, 229 { "\u001d", StringUtils.EMPTY }, 230 { "\u001e", StringUtils.EMPTY }, 231 { "\u001f", StringUtils.EMPTY }, 232 { "\ufffe", StringUtils.EMPTY }, 233 { "\uffff", StringUtils.EMPTY } 234 }), 235 NumericEntityEscaper.between(0x7f, 0x84), 236 NumericEntityEscaper.between(0x86, 0x9f), 237 new UnicodeUnpairedSurrogateRemover() 238 ); 239 240 /** 241 * Translator object for escaping XML 1.1. 242 * 243 * While {@link #escapeXml11(String)} is the expected method of use, this 244 * object allows the XML escaping functionality to be used 245 * as the foundation for a custom translator. 246 * 247 * @since 3.3 248 */ 249 public static final CharSequenceTranslator ESCAPE_XML11 = 250 new AggregateTranslator( 251 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 252 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 253 new LookupTranslator( 254 new String[][] { 255 { "\u0000", StringUtils.EMPTY }, 256 { "\u000b", "" }, 257 { "\u000c", "" }, 258 { "\ufffe", StringUtils.EMPTY }, 259 { "\uffff", StringUtils.EMPTY } 260 }), 261 NumericEntityEscaper.between(0x1, 0x8), 262 NumericEntityEscaper.between(0xe, 0x1f), 263 NumericEntityEscaper.between(0x7f, 0x84), 264 NumericEntityEscaper.between(0x86, 0x9f), 265 new UnicodeUnpairedSurrogateRemover() 266 ); 267 268 /** 269 * Translator object for escaping HTML version 3.0. 270 * 271 * While {@link #escapeHtml3(String)} is the expected method of use, this 272 * object allows the HTML escaping functionality to be used 273 * as the foundation for a custom translator. 274 * 275 * @since 3.0 276 */ 277 public static final CharSequenceTranslator ESCAPE_HTML3 = 278 new AggregateTranslator( 279 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 280 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 281 ); 282 283 /** 284 * Translator object for escaping HTML version 4.0. 285 * 286 * While {@link #escapeHtml4(String)} is the expected method of use, this 287 * object allows the HTML escaping functionality to be used 288 * as the foundation for a custom translator. 289 * 290 * @since 3.0 291 */ 292 public static final CharSequenceTranslator ESCAPE_HTML4 = 293 new AggregateTranslator( 294 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 295 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 296 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 297 ); 298 299 /* UNESCAPE TRANSLATORS */ 300 301 /** 302 * Translator object for escaping individual Comma Separated Values. 303 * 304 * While {@link #escapeCsv(String)} is the expected method of use, this 305 * object allows the CSV escaping functionality to be used 306 * as the foundation for a custom translator. 307 * 308 * @since 3.0 309 */ 310 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 311 312 /** 313 * Translator object for unescaping escaped Java. 314 * 315 * While {@link #unescapeJava(String)} is the expected method of use, this 316 * object allows the Java unescaping functionality to be used 317 * as the foundation for a custom translator. 318 * 319 * @since 3.0 320 */ 321 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 322 public static final CharSequenceTranslator UNESCAPE_JAVA = 323 new AggregateTranslator( 324 new OctalUnescaper(), // .between('\1', '\377'), 325 new UnicodeUnescaper(), 326 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 327 new LookupTranslator( 328 new String[][] { 329 {"\\\\", "\\"}, 330 {"\\\"", "\""}, 331 {"\\'", "'"}, 332 {"\\", ""} 333 }) 334 ); 335 336 /** 337 * Translator object for unescaping escaped EcmaScript. 338 * 339 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 340 * object allows the EcmaScript unescaping functionality to be used 341 * as the foundation for a custom translator. 342 * 343 * @since 3.0 344 */ 345 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 346 347 /** 348 * Translator object for unescaping escaped Json. 349 * 350 * While {@link #unescapeJson(String)} is the expected method of use, this 351 * object allows the Json unescaping functionality to be used 352 * as the foundation for a custom translator. 353 * 354 * @since 3.2 355 */ 356 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 357 358 /** 359 * Translator object for unescaping escaped HTML 3.0. 360 * 361 * While {@link #unescapeHtml3(String)} is the expected method of use, this 362 * object allows the HTML unescaping functionality to be used 363 * as the foundation for a custom translator. 364 * 365 * @since 3.0 366 */ 367 public static final CharSequenceTranslator UNESCAPE_HTML3 = 368 new AggregateTranslator( 369 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 370 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 371 new NumericEntityUnescaper() 372 ); 373 374 /** 375 * Translator object for unescaping escaped HTML 4.0. 376 * 377 * While {@link #unescapeHtml4(String)} is the expected method of use, this 378 * object allows the HTML unescaping functionality to be used 379 * as the foundation for a custom translator. 380 * 381 * @since 3.0 382 */ 383 public static final CharSequenceTranslator UNESCAPE_HTML4 = 384 new AggregateTranslator( 385 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 386 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 387 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 388 new NumericEntityUnescaper() 389 ); 390 391 /** 392 * Translator object for unescaping escaped XML. 393 * 394 * While {@link #unescapeXml(String)} is the expected method of use, this 395 * object allows the XML unescaping functionality to be used 396 * as the foundation for a custom translator. 397 * 398 * @since 3.0 399 */ 400 public static final CharSequenceTranslator UNESCAPE_XML = 401 new AggregateTranslator( 402 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 403 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 404 new NumericEntityUnescaper() 405 ); 406 407 /** 408 * Translator object for unescaping escaped Comma Separated Value entries. 409 * 410 * While {@link #unescapeCsv(String)} is the expected method of use, this 411 * object allows the CSV unescaping functionality to be used 412 * as the foundation for a custom translator. 413 * 414 * @since 3.0 415 */ 416 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 417 418 /* Helper functions */ 419 420 /** 421 * Returns a {@link String} value for a CSV column enclosed in double quotes, 422 * if required. 423 * 424 * <p>If the value contains a comma, newline or double quote, then the 425 * String value is returned enclosed in double quotes.</p> 426 * 427 * <p>Any double quote characters in the value are escaped with another double quote.</p> 428 * 429 * <p>If the value does not contain a comma, newline or double quote, then the 430 * String value is returned unchanged.</p> 431 * 432 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 433 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 434 * 435 * @param input the input CSV column String, may be null 436 * @return the input String, enclosed in double quotes if the value contains a comma, 437 * newline or double quote, {@code null} if null string input 438 * @since 2.4 439 */ 440 public static final String escapeCsv(final String input) { 441 return ESCAPE_CSV.translate(input); 442 } 443 444 /** 445 * Escapes the characters in a {@link String} using EcmaScript String rules. 446 * <p>Escapes any values it finds into their EcmaScript String form. 447 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 448 * 449 * <p>So a tab becomes the characters {@code '\\'} and 450 * {@code 't'}.</p> 451 * 452 * <p>The only difference between Java strings and EcmaScript strings 453 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 454 * 455 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 456 * 457 * <p>Example:</p> 458 * <pre> 459 * input string: He didn't say, "Stop!" 460 * output string: He didn\'t say, \"Stop!\" 461 * </pre> 462 * 463 * @param input String to escape values in, may be null 464 * @return String with escaped values, {@code null} if null string input 465 * 466 * @since 3.0 467 */ 468 public static final String escapeEcmaScript(final String input) { 469 return ESCAPE_ECMASCRIPT.translate(input); 470 } 471 472 /** 473 * Escapes the characters in a {@link String} using HTML entities. 474 * <p>Supports only the HTML 3.0 entities.</p> 475 * 476 * @param input the {@link String} to escape, may be null 477 * @return a new escaped {@link String}, {@code null} if null string input 478 * 479 * @since 3.0 480 */ 481 public static final String escapeHtml3(final String input) { 482 return ESCAPE_HTML3.translate(input); 483 } 484 485 /** 486 * Escapes the characters in a {@link String} using HTML entities. 487 * 488 * <p> 489 * For example: 490 * </p> 491 * <p>{@code "bread" & "butter"}</p> 492 * becomes: 493 * <p> 494 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 495 * </p> 496 * 497 * <p>Supports all known HTML 4.0 entities, including funky accents. 498 * Note that the commonly used apostrophe escape character (&apos;) 499 * is not a legal entity and so is not supported).</p> 500 * 501 * @param input the {@link String} to escape, may be null 502 * @return a new escaped {@link String}, {@code null} if null string input 503 * 504 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 505 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 506 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 507 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 508 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 509 * 510 * @since 3.0 511 */ 512 public static final String escapeHtml4(final String input) { 513 return ESCAPE_HTML4.translate(input); 514 } 515 516 /** 517 * Escapes the characters in a {@link String} using Java String rules. 518 * 519 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 520 * 521 * <p>So a tab becomes the characters {@code '\\'} and 522 * {@code 't'}.</p> 523 * 524 * <p>The only difference between Java strings and JavaScript strings 525 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 526 * 527 * <p>Example:</p> 528 * <pre> 529 * input string: He didn't say, "Stop!" 530 * output string: He didn't say, \"Stop!\" 531 * </pre> 532 * 533 * @param input String to escape values in, may be null 534 * @return String with escaped values, {@code null} if null string input 535 */ 536 public static final String escapeJava(final String input) { 537 return ESCAPE_JAVA.translate(input); 538 } 539 540 /** 541 * Escapes the characters in a {@link String} using Json String rules. 542 * <p>Escapes any values it finds into their Json String form. 543 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 544 * 545 * <p>So a tab becomes the characters {@code '\\'} and 546 * {@code 't'}.</p> 547 * 548 * <p>The only difference between Java strings and Json strings 549 * is that in Json, forward-slash (/) is escaped.</p> 550 * 551 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 552 * 553 * <p>Example:</p> 554 * <pre> 555 * input string: He didn't say, "Stop!" 556 * output string: He didn't say, \"Stop!\" 557 * </pre> 558 * 559 * @param input String to escape values in, may be null 560 * @return String with escaped values, {@code null} if null string input 561 * 562 * @since 3.2 563 */ 564 public static final String escapeJson(final String input) { 565 return ESCAPE_JSON.translate(input); 566 } 567 568 /** 569 * Escapes the characters in a {@link String} using XML entities. 570 * 571 * <p>For example: {@code "bread" & "butter"} => 572 * {@code "bread" & "butter"}. 573 * </p> 574 * 575 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 576 * Does not support DTDs or external entities.</p> 577 * 578 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 579 * escaped. If you still wish this functionality, you can achieve it 580 * via the following: 581 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 582 * 583 * @param input the {@link String} to escape, may be null 584 * @return a new escaped {@link String}, {@code null} if null string input 585 * @see #unescapeXml(String) 586 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 587 */ 588 @Deprecated 589 public static final String escapeXml(final String input) { 590 return ESCAPE_XML.translate(input); 591 } 592 593 /** 594 * Escapes the characters in a {@link String} using XML entities. 595 * 596 * <p>For example: {@code "bread" & "butter"} => 597 * {@code "bread" & "butter"}. 598 * </p> 599 * 600 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 601 * characters or unpaired Unicode surrogate code points, even after escaping. 602 * {@code escapeXml10} will remove characters that do not fit in the 603 * following ranges:</p> 604 * 605 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 606 * 607 * <p>Though not strictly necessary, {@code escapeXml10} will escape 608 * characters in the following ranges:</p> 609 * 610 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 611 * 612 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 613 * document. If you want to allow more non-text characters in an XML 1.1 614 * document, use {@link #escapeXml11(String)}.</p> 615 * 616 * @param input the {@link String} to escape, may be null 617 * @return a new escaped {@link String}, {@code null} if null string input 618 * @see #unescapeXml(String) 619 * @since 3.3 620 */ 621 public static String escapeXml10(final String input) { 622 return ESCAPE_XML10.translate(input); 623 } 624 625 /** 626 * Escapes the characters in a {@link String} using XML entities. 627 * 628 * <p>For example: {@code "bread" & "butter"} => 629 * {@code "bread" & "butter"}. 630 * </p> 631 * 632 * <p>XML 1.1 can represent certain control characters, but it cannot represent 633 * the null byte or unpaired Unicode surrogate code points, even after escaping. 634 * {@code escapeXml11} will remove characters that do not fit in the following 635 * ranges:</p> 636 * 637 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 638 * 639 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 640 * 641 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 642 * 643 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 644 * use it for XML 1.0 documents.</p> 645 * 646 * @param input the {@link String} to escape, may be null 647 * @return a new escaped {@link String}, {@code null} if null string input 648 * @see #unescapeXml(String) 649 * @since 3.3 650 */ 651 public static String escapeXml11(final String input) { 652 return ESCAPE_XML11.translate(input); 653 } 654 655 /** 656 * Returns a {@link String} value for an unescaped CSV column. 657 * 658 * <p>If the value is enclosed in double quotes, and contains a comma, newline 659 * or double quote, then quotes are removed. 660 * </p> 661 * 662 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 663 * to just one double quote.</p> 664 * 665 * <p>If the value is not enclosed in double quotes, or is and does not contain a 666 * comma, newline or double quote, then the String value is returned unchanged.</p> 667 * 668 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 669 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 670 * 671 * @param input the input CSV column String, may be null 672 * @return the input String, with enclosing double quotes removed and embedded double 673 * quotes unescaped, {@code null} if null string input 674 * @since 2.4 675 */ 676 public static final String unescapeCsv(final String input) { 677 return UNESCAPE_CSV.translate(input); 678 } 679 680 /** 681 * Unescapes any EcmaScript literals found in the {@link String}. 682 * 683 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 684 * into a newline character, unless the {@code '\'} is preceded by another 685 * {@code '\'}.</p> 686 * 687 * @see #unescapeJava(String) 688 * @param input the {@link String} to unescape, may be null 689 * @return A new unescaped {@link String}, {@code null} if null string input 690 * 691 * @since 3.0 692 */ 693 public static final String unescapeEcmaScript(final String input) { 694 return UNESCAPE_ECMASCRIPT.translate(input); 695 } 696 697 /** 698 * Unescapes a string containing entity escapes to a string 699 * containing the actual Unicode characters corresponding to the 700 * escapes. Supports only HTML 3.0 entities. 701 * 702 * @param input the {@link String} to unescape, may be null 703 * @return a new unescaped {@link String}, {@code null} if null string input 704 * 705 * @since 3.0 706 */ 707 public static final String unescapeHtml3(final String input) { 708 return UNESCAPE_HTML3.translate(input); 709 } 710 711 /** 712 * Unescapes a string containing entity escapes to a string 713 * containing the actual Unicode characters corresponding to the 714 * escapes. Supports HTML 4.0 entities. 715 * 716 * <p>For example, the string {@code "<Français>"} 717 * will become {@code "<Français>"}</p> 718 * 719 * <p>If an entity is unrecognized, it is left alone, and inserted 720 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 721 * become {@code ">&zzzz;x"}.</p> 722 * 723 * @param input the {@link String} to unescape, may be null 724 * @return a new unescaped {@link String}, {@code null} if null string input 725 * 726 * @since 3.0 727 */ 728 public static final String unescapeHtml4(final String input) { 729 return UNESCAPE_HTML4.translate(input); 730 } 731 732 /** 733 * Unescapes any Java literals found in the {@link String}. 734 * For example, it will turn a sequence of {@code '\'} and 735 * {@code 'n'} into a newline character, unless the {@code '\'} 736 * is preceded by another {@code '\'}. 737 * 738 * @param input the {@link String} to unescape, may be null 739 * @return a new unescaped {@link String}, {@code null} if null string input 740 */ 741 public static final String unescapeJava(final String input) { 742 return UNESCAPE_JAVA.translate(input); 743 } 744 745 /** 746 * Unescapes any Json literals found in the {@link String}. 747 * 748 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 749 * into a newline character, unless the {@code '\'} is preceded by another 750 * {@code '\'}.</p> 751 * 752 * @see #unescapeJava(String) 753 * @param input the {@link String} to unescape, may be null 754 * @return A new unescaped {@link String}, {@code null} if null string input 755 * 756 * @since 3.2 757 */ 758 public static final String unescapeJson(final String input) { 759 return UNESCAPE_JSON.translate(input); 760 } 761 762 763 /** 764 * Unescapes a string containing XML entity escapes to a string 765 * containing the actual Unicode characters corresponding to the 766 * escapes. 767 * 768 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 769 * Does not support DTDs or external entities.</p> 770 * 771 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 772 * Unicode characters. This may change in future releases.</p> 773 * 774 * @param input the {@link String} to unescape, may be null 775 * @return a new unescaped {@link String}, {@code null} if null string input 776 * @see #escapeXml(String) 777 * @see #escapeXml10(String) 778 * @see #escapeXml11(String) 779 */ 780 public static final String unescapeXml(final String input) { 781 return UNESCAPE_XML.translate(input); 782 } 783 784 /** 785 * {@link StringEscapeUtils} instances should NOT be constructed in 786 * standard programming. 787 * 788 * <p>Instead, the class should be used as:</p> 789 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 790 * 791 * <p>This constructor is public to permit tools that require a JavaBean 792 * instance to operate.</p> 793 */ 794 public StringEscapeUtils() { 795 } 796 797}