001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.io.IOException; 020import java.io.Writer; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.text.translate.AggregateTranslator; 027import org.apache.commons.text.translate.CharSequenceTranslator; 028import org.apache.commons.text.translate.CsvTranslators; 029import org.apache.commons.text.translate.EntityArrays; 030import org.apache.commons.text.translate.JavaUnicodeEscaper; 031import org.apache.commons.text.translate.LookupTranslator; 032import org.apache.commons.text.translate.NumericEntityEscaper; 033import org.apache.commons.text.translate.NumericEntityUnescaper; 034import org.apache.commons.text.translate.OctalUnescaper; 035import org.apache.commons.text.translate.UnicodeUnescaper; 036import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 037 038/** 039 * <p> 040 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. 041 * </p> 042 * 043 * <p> 044 * #ThreadSafe# 045 * </p> 046 * 047 * <p> 048 * This code has been adapted from Apache Commons Lang 3.5. 049 * </p> 050 * 051 * @since 1.0 052 */ 053public class StringEscapeUtils { 054 055 /* ESCAPE TRANSLATORS */ 056 057 /** 058 * Convenience wrapper for {@link StringBuilder} providing escape methods. 059 * 060 * <p>Example:</p> 061 * <pre> 062 * new Builder(ESCAPE_HTML4) 063 * .append("<p>") 064 * .escape("This is paragraph 1 and special chars like & get escaped.") 065 * .append("</p><p>") 066 * .escape("This is paragraph 2 & more...") 067 * .append("</p>") 068 * .toString() 069 * </pre> 070 */ 071 public static final class Builder { 072 073 /** 074 * StringBuilder to be used in the Builder class. 075 */ 076 private final StringBuilder sb; 077 078 /** 079 * CharSequenceTranslator to be used in the Builder class. 080 */ 081 private final CharSequenceTranslator translator; 082 083 /** 084 * Builder constructor. 085 * 086 * @param translator a CharSequenceTranslator. 087 */ 088 private Builder(final CharSequenceTranslator translator) { 089 this.sb = new StringBuilder(); 090 this.translator = translator; 091 } 092 093 /** 094 * Literal append, no escaping being done. 095 * 096 * @param input the String to append 097 * @return {@code this}, to enable chaining 098 */ 099 public Builder append(final String input) { 100 sb.append(input); 101 return this; 102 } 103 104 /** 105 * Escape {@code input} according to the given {@link CharSequenceTranslator}. 106 * 107 * @param input the String to escape 108 * @return {@code this}, to enable chaining 109 */ 110 public Builder escape(final String input) { 111 sb.append(translator.translate(input)); 112 return this; 113 } 114 115 /** 116 * Return the escaped string. 117 * 118 * @return The escaped string 119 */ 120 @Override 121 public String toString() { 122 return sb.toString(); 123 } 124 } 125 /** 126 * Translator object for unescaping backslash escaped entries. 127 */ 128 static class XsiUnescaper extends CharSequenceTranslator { 129 130 /** 131 * Escaped backslash constant. 132 */ 133 private static final char BACKSLASH = '\\'; 134 135 @Override 136 public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { 137 138 if (index != 0) { 139 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 140 } 141 142 final String s = input.toString(); 143 144 int segmentStart = 0; 145 int searchOffset = 0; 146 while (true) { 147 final int pos = s.indexOf(BACKSLASH, searchOffset); 148 if (pos == -1) { 149 if (segmentStart < s.length()) { 150 writer.write(s.substring(segmentStart)); 151 } 152 break; 153 } 154 if (pos > segmentStart) { 155 writer.write(s.substring(segmentStart, pos)); 156 } 157 segmentStart = pos + 1; 158 searchOffset = pos + 2; 159 } 160 161 return Character.codePointCount(input, 0, input.length()); 162 } 163 } 164 165 /** 166 * Translator object for escaping Java. 167 * 168 * While {@link #escapeJava(String)} is the expected method of use, this object allows the Java escaping functionality to be used as the foundation for a 169 * custom translator. 170 */ 171 public static final CharSequenceTranslator ESCAPE_JAVA; 172 static { 173 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 174 escapeJavaMap.put("\"", "\\\""); 175 escapeJavaMap.put("\\", "\\\\"); 176 ESCAPE_JAVA = new AggregateTranslator( 177 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 178 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 179 JavaUnicodeEscaper.outsideOf(32, 0x7f) 180 ); 181 } 182 183 /** 184 * Translator object for escaping EcmaScript/JavaScript. 185 * 186 * While {@link #escapeEcmaScript(String)} is the expected method of use, this object allows the EcmaScript escaping functionality to be used as the 187 * foundation for a custom translator. 188 */ 189 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 190 static { 191 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 192 escapeEcmaScriptMap.put("'", "\\'"); 193 escapeEcmaScriptMap.put("\"", "\\\""); 194 escapeEcmaScriptMap.put("\\", "\\\\"); 195 escapeEcmaScriptMap.put("/", "\\/"); 196 ESCAPE_ECMASCRIPT = new AggregateTranslator( 197 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 198 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 199 JavaUnicodeEscaper.outsideOf(32, 0x7f) 200 ); 201 } 202 203 /** 204 * Translator object for escaping Json. 205 * 206 * While {@link #escapeJson(String)} is the expected method of use, this object allows the Json escaping functionality to be used as the foundation for a 207 * custom translator. 208 */ 209 public static final CharSequenceTranslator ESCAPE_JSON; 210 static { 211 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 212 escapeJsonMap.put("\"", "\\\""); 213 escapeJsonMap.put("\\", "\\\\"); 214 escapeJsonMap.put("/", "\\/"); 215 ESCAPE_JSON = new AggregateTranslator( 216 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 217 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 218 JavaUnicodeEscaper.outsideOf(32, 0x7e) 219 ); 220 } 221 222 /** 223 * Translator object for escaping XML 1.0. 224 * 225 * While {@link #escapeXml10(String)} is the expected method of use, this object allows the XML escaping functionality to be used as the foundation for a 226 * custom translator. 227 */ 228 public static final CharSequenceTranslator ESCAPE_XML10; 229 static { 230 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 231 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 232 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 233 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 234 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 235 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 236 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 237 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 238 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 239 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 240 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 241 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 242 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 243 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 244 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 245 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 246 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 247 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 248 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 249 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 250 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 251 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 252 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 253 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 254 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 255 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 256 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 257 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 258 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 259 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 260 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 261 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 262 ESCAPE_XML10 = new AggregateTranslator( 263 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 264 new LookupTranslator(EntityArrays.APOS_ESCAPE), 265 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 266 NumericEntityEscaper.between(0x7f, 0x84), 267 NumericEntityEscaper.between(0x86, 0x9f), 268 new UnicodeUnpairedSurrogateRemover() 269 ); 270 } 271 272 /** 273 * Translator object for escaping XML 1.1. 274 * 275 * While {@link #escapeXml11(String)} is the expected method of use, this 276 * object allows the XML escaping functionality to be used 277 * as the foundation for a custom translator. 278 */ 279 public static final CharSequenceTranslator ESCAPE_XML11; 280 281 static { 282 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 283 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 284 escapeXml11Map.put("\u000b", ""); 285 escapeXml11Map.put("\u000c", ""); 286 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 287 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 288 ESCAPE_XML11 = new AggregateTranslator( 289 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 290 new LookupTranslator(EntityArrays.APOS_ESCAPE), 291 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 292 NumericEntityEscaper.between(0x1, 0x8), 293 NumericEntityEscaper.between(0xe, 0x1f), 294 NumericEntityEscaper.between(0x7f, 0x84), 295 NumericEntityEscaper.between(0x86, 0x9f), 296 new UnicodeUnpairedSurrogateRemover() 297 ); 298 } 299 300 /** 301 * Translator object for escaping HTML version 3.0. 302 * 303 * While {@link #escapeHtml3(String)} is the expected method of use, this 304 * object allows the HTML escaping functionality to be used 305 * as the foundation for a custom translator. 306 */ 307 public static final CharSequenceTranslator ESCAPE_HTML3 = 308 new AggregateTranslator( 309 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 310 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 311 ); 312 313 /** 314 * Translator object for escaping HTML version 4.0. 315 * 316 * While {@link #escapeHtml4(String)} is the expected method of use, this 317 * object allows the HTML escaping functionality to be used 318 * as the foundation for a custom translator. 319 */ 320 public static final CharSequenceTranslator ESCAPE_HTML4 = 321 new AggregateTranslator( 322 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 323 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 324 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 325 ); 326 /** 327 * Translator object for escaping individual Comma Separated Values. 328 * 329 * While {@link #escapeCsv(String)} is the expected method of use, this 330 * object allows the CSV escaping functionality to be used 331 * as the foundation for a custom translator. 332 */ 333 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 334 335 /* UNESCAPE TRANSLATORS */ 336 337 /** 338 * Translator object for escaping Shell command language. 339 * 340 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 341 */ 342 public static final CharSequenceTranslator ESCAPE_XSI; 343 static { 344 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 345 escapeXsiMap.put("|", "\\|"); 346 escapeXsiMap.put("&", "\\&"); 347 escapeXsiMap.put(";", "\\;"); 348 escapeXsiMap.put("<", "\\<"); 349 escapeXsiMap.put(">", "\\>"); 350 escapeXsiMap.put("(", "\\("); 351 escapeXsiMap.put(")", "\\)"); 352 escapeXsiMap.put("$", "\\$"); 353 escapeXsiMap.put("`", "\\`"); 354 escapeXsiMap.put("\\", "\\\\"); 355 escapeXsiMap.put("\"", "\\\""); 356 escapeXsiMap.put("'", "\\'"); 357 escapeXsiMap.put(" ", "\\ "); 358 escapeXsiMap.put("\t", "\\\t"); 359 escapeXsiMap.put("\r\n", StringUtils.EMPTY); 360 escapeXsiMap.put("\n", StringUtils.EMPTY); 361 escapeXsiMap.put("*", "\\*"); 362 escapeXsiMap.put("?", "\\?"); 363 escapeXsiMap.put("[", "\\["); 364 escapeXsiMap.put("#", "\\#"); 365 escapeXsiMap.put("~", "\\~"); 366 escapeXsiMap.put("=", "\\="); 367 escapeXsiMap.put("%", "\\%"); 368 ESCAPE_XSI = new LookupTranslator( 369 Collections.unmodifiableMap(escapeXsiMap) 370 ); 371 } 372 373 /** 374 * Translator object for unescaping escaped Java. 375 * 376 * While {@link #unescapeJava(String)} is the expected method of use, this 377 * object allows the Java unescaping functionality to be used 378 * as the foundation for a custom translator. 379 */ 380 public static final CharSequenceTranslator UNESCAPE_JAVA; 381 382 static { 383 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 384 unescapeJavaMap.put("\\\\", "\\"); 385 unescapeJavaMap.put("\\\"", "\""); 386 unescapeJavaMap.put("\\'", "'"); 387 unescapeJavaMap.put("\\", StringUtils.EMPTY); 388 UNESCAPE_JAVA = new AggregateTranslator( 389 new OctalUnescaper(), // .between('\1', '\377'), 390 new UnicodeUnescaper(), 391 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 392 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 393 ); 394 } 395 396 /** 397 * Translator object for unescaping escaped EcmaScript. 398 * 399 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 400 * object allows the EcmaScript unescaping functionality to be used 401 * as the foundation for a custom translator. 402 */ 403 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 404 405 /** 406 * Translator object for unescaping escaped Json. 407 * 408 * While {@link #unescapeJson(String)} is the expected method of use, this 409 * object allows the Json unescaping functionality to be used 410 * as the foundation for a custom translator. 411 */ 412 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 413 414 /** 415 * Translator object for unescaping escaped HTML 3.0. 416 * 417 * While {@link #unescapeHtml3(String)} is the expected method of use, this 418 * object allows the HTML unescaping functionality to be used 419 * as the foundation for a custom translator. 420 */ 421 public static final CharSequenceTranslator UNESCAPE_HTML3 = 422 new AggregateTranslator( 423 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 424 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 425 new NumericEntityUnescaper() 426 ); 427 428 /** 429 * Translator object for unescaping escaped HTML 4.0. 430 * 431 * While {@link #unescapeHtml4(String)} is the expected method of use, this 432 * object allows the HTML unescaping functionality to be used 433 * as the foundation for a custom translator. 434 */ 435 public static final CharSequenceTranslator UNESCAPE_HTML4 = 436 new AggregateTranslator( 437 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 438 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 439 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 440 new NumericEntityUnescaper() 441 ); 442 443 /** 444 * Translator object for unescaping escaped XML. 445 * 446 * While {@link #unescapeXml(String)} is the expected method of use, this 447 * object allows the XML unescaping functionality to be used 448 * as the foundation for a custom translator. 449 */ 450 public static final CharSequenceTranslator UNESCAPE_XML = 451 new AggregateTranslator( 452 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 453 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 454 new NumericEntityUnescaper() 455 ); 456 457 /** 458 * Translator object for unescaping escaped Comma Separated Value entries. 459 * 460 * While {@link #unescapeCsv(String)} is the expected method of use, this 461 * object allows the CSV unescaping functionality to be used 462 * as the foundation for a custom translator. 463 */ 464 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 465 466 /* Helper functions */ 467 468 /** 469 * Translator object for unescaping escaped XSI Value entries. 470 * 471 * While {@link #unescapeXSI(String)} is the expected method of use, this 472 * object allows the XSI unescaping functionality to be used 473 * as the foundation for a custom translator. 474 */ 475 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 476 477 /** 478 * Gets a {@link Builder}. 479 * 480 * @param translator the text translator. 481 * @return {@link Builder} 482 */ 483 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 484 return new Builder(translator); 485 } 486 487 /** 488 * Returns a {@code String} value for a CSV column enclosed in double quotes, if required. 489 * 490 * <p> 491 * If the value contains a comma, newline or double quote, then the String value is returned enclosed in double quotes. 492 * </p> 493 * 494 * <p> 495 * Any double quote characters in the value are escaped with another double quote. 496 * </p> 497 * 498 * <p> 499 * If the value does not contain a comma, newline or double quote, then the String value is returned unchanged. 500 * </p> 501 * <p> 502 * See <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 503 * </p> 504 * 505 * @param input the input CSV column String, may be null. 506 * @return The input String, enclosed in double quotes if the value contains a comma, newline or double quote, {@code null} if null string input. 507 */ 508 public static String escapeCsv(final String input) { 509 return ESCAPE_CSV.translate(input); 510 } 511 512 /** 513 * Escapes the characters in a {@code String} using EcmaScript String rules. 514 * 515 * <p> 516 * Escapes any values it finds into their EcmaScript String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) 517 * </p> 518 * 519 * <p> 520 * So a tab becomes the characters {@code '\\'} and {@code 't'}. 521 * </p> 522 * 523 * <p> 524 * The only difference between Java strings and EcmaScript strings is that in EcmaScript, a single quote and forward-slash (/) are escaped. 525 * </p> 526 * 527 * <p> 528 * Note that EcmaScript is best known by the JavaScript and ActionScript dialects. 529 * </p> 530 * 531 * <p> 532 * Example: 533 * </p> 534 * 535 * <pre> 536 * input string: He didn't say, "Stop!" 537 * output string: He didn\'t say, \"Stop!\" 538 * </pre> 539 * <p> 540 * <strong>Security Note.</strong> We only provide backslash escaping in this method. For example, {@code '\"'} has the output {@code '\\\"'} which could 541 * result in potential issues in the case where the string being escaped is being used in an HTML tag like {@code <select onmouseover="..." />}. If you wish 542 * to have more rigorous string escaping, you may consider the <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI 543 * Libraries</a>. Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 544 * </p> 545 * 546 * @param input String to escape values in, may be null. 547 * @return String with escaped values, {@code null} if null string input. 548 */ 549 public static String escapeEcmaScript(final String input) { 550 return ESCAPE_ECMASCRIPT.translate(input); 551 } 552 553 /** 554 * Escapes the characters in a {@code String} using HTML entities. 555 * 556 * <p>Supports only the HTML 3.0 entities.</p> 557 * 558 * @param input the {@code String} to escape, may be null. 559 * @return a new escaped {@code String}, {@code null} if null string input. 560 */ 561 public static String escapeHtml3(final String input) { 562 return ESCAPE_HTML3.translate(input); 563 } 564 565 // HTML and XML 566 /** 567 * Escapes the characters in a {@code String} using HTML entities. 568 * 569 * <p> 570 * For example: 571 * </p> 572 * <p>{@code "bread" & "butter"}</p> 573 * becomes: 574 * <p> 575 * {@code "bread" &amp; "butter"}. 576 * </p> 577 * 578 * <p>Supports all known HTML 4.0 entities, including funky accents. 579 * Note that the commonly used apostrophe escape character (&apos;) 580 * is not a legal entity and so is not supported).</p> 581 * 582 * @param input the {@code String} to escape, may be null. 583 * @return a new escaped {@code String}, {@code null} if null string input. 584 * @see <a href="https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 585 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 586 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 587 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 588 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 589 */ 590 public static String escapeHtml4(final String input) { 591 return ESCAPE_HTML4.translate(input); 592 } 593 594 // Java and JavaScript 595 /** 596 * Escapes the characters in a {@code String} using Java String rules. 597 * 598 * <p> 599 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) 600 * </p> 601 * 602 * <p> 603 * So a tab becomes the characters {@code '\\'} and {@code 't'}. 604 * </p> 605 * 606 * <p> 607 * The only difference between Java strings and JavaScript strings is that in JavaScript, a single quote and forward-slash (/) are escaped. 608 * </p> 609 * 610 * <p> 611 * Example: 612 * </p> 613 * 614 * <pre> 615 * input string: He didn't say, "Stop!" 616 * output string: He didn't say, \"Stop!\" 617 * </pre> 618 * 619 * @param input String to escape values in, may be null. 620 * @return String with escaped values, {@code null} if null string input. 621 */ 622 public static String escapeJava(final String input) { 623 return ESCAPE_JAVA.translate(input); 624 } 625 626 /** 627 * Escapes the characters in a {@code String} using Json String rules. 628 * 629 * <p> 630 * Escapes any values it finds into their Json String form. Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) 631 * </p> 632 * 633 * <p> 634 * So a tab becomes the characters {@code '\\'} and {@code 't'}. 635 * </p> 636 * 637 * <p> 638 * The only difference between Java strings and Json strings is that in Json, forward-slash (/) is escaped. 639 * </p> 640 * 641 * <p> 642 * See http://www.ietf.org/rfc/rfc4627.txt for further details. 643 * </p> 644 * 645 * <p> 646 * Example: 647 * </p> 648 * 649 * <pre> 650 * input string: He didn't say, "Stop!" 651 * output string: He didn't say, \"Stop!\" 652 * </pre> 653 * 654 * @param input String to escape values in, may be null. 655 * @return String with escaped values, {@code null} if null string input. 656 */ 657 public static String escapeJson(final String input) { 658 return ESCAPE_JSON.translate(input); 659 } 660 661 /** 662 * Escapes the characters in a {@code String} using XML entities. 663 * 664 * <p> 665 * For example: {@code "bread" & "butter"} => {@code "bread" & "butter"}. 666 * </p> 667 * 668 * <p> 669 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. 670 * {@code escapeXml10} will remove characters that do not fit in the following ranges: 671 * </p> 672 * 673 * <p> 674 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]} 675 * </p> 676 * 677 * <p> 678 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges: 679 * </p> 680 * 681 * <p> 682 * {@code [#x7F-#x84] | [#x86-#x9F]} 683 * </p> 684 * 685 * <p> 686 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use 687 * {@link #escapeXml11(String)}. 688 * </p> 689 * 690 * @param input the {@code String} to escape, may be null. 691 * @return a new escaped {@code String}, {@code null} if null string input. 692 * @see #unescapeXml(String) 693 */ 694 public static String escapeXml10(final String input) { 695 return ESCAPE_XML10.translate(input); 696 } 697 698 /** 699 * Escapes the characters in a {@code String} using XML entities. 700 * 701 * <p> 702 * For example: {@code "bread" & "butter"} => {@code "bread" & "butter"}. 703 * </p> 704 * 705 * <p> 706 * XML 1.1 can represent certain control characters, but it cannot represent the null byte or unpaired Unicode surrogate code points, even after escaping. 707 * {@code escapeXml11} will remove characters that do not fit in the following ranges: 708 * </p> 709 * 710 * <p> 711 * {@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]} 712 * </p> 713 * 714 * <p> 715 * {@code escapeXml11} will escape characters in the following ranges: 716 * </p> 717 * 718 * <p> 719 * {@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]} 720 * </p> 721 * 722 * <p> 723 * The returned string can be inserted into a valid XML 1.1 document. Do not use it for XML 1.0 documents. 724 * </p> 725 * 726 * @param input the {@code String} to escape, may be null. 727 * @return a new escaped {@code String}, {@code null} if null string input. 728 * @see #unescapeXml(String) 729 */ 730 public static String escapeXml11(final String input) { 731 return ESCAPE_XML11.translate(input); 732 } 733 734 /** 735 * Escapes the characters in a {@code String} using XSI rules. 736 * 737 * <p> 738 * <strong>Beware!</strong> In most cases you don't want to escape shell commands but use multi-argument methods provided by {@link ProcessBuilder} or 739 * {@link Runtime#exec(String[])} instead. 740 * </p> 741 * 742 * <p> 743 * Example: 744 * </p> 745 * 746 * <pre> 747 * input string: He didn't say, "Stop!" 748 * output string: He\ didn\'t\ say,\ \"Stop!\" 749 * </pre> 750 * 751 * @param input String to escape values in, may be null. 752 * @return String with escaped values, {@code null} if null string input. 753 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 754 */ 755 public static String escapeXSI(final String input) { 756 return ESCAPE_XSI.translate(input); 757 } 758 759 /** 760 * Returns a {@code String} value for an unescaped CSV column. 761 * 762 * <p> 763 * If the value is enclosed in double quotes, and contains a comma, newline or double quote, then quotes are removed. 764 * </p> 765 * 766 * <p> 767 * Any double quote escaped characters (a pair of double quotes) are unescaped to just one double quote. 768 * </p> 769 * 770 * <p> 771 * If the value is not enclosed in double quotes, or is and does not contain a comma, newline or double quote, then the String value is returned unchanged. 772 * </p> 773 * 774 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 775 * 776 * @param input the input CSV column String, may be null. 777 * @return The input String, with enclosing double quotes removed and embedded double quotes unescaped, {@code null} if null string input. 778 */ 779 public static String unescapeCsv(final String input) { 780 return UNESCAPE_CSV.translate(input); 781 } 782 783 /** 784 * Unescapes any EcmaScript literals found in the {@code String}. 785 * 786 * <p> 787 * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}. 788 * </p> 789 * 790 * @param input the {@code String} to unescape, may be null. 791 * @return A new unescaped {@code String}, {@code null} if null string input. 792 * @see #unescapeJava(String) 793 */ 794 public static String unescapeEcmaScript(final String input) { 795 return UNESCAPE_ECMASCRIPT.translate(input); 796 } 797 798 /** 799 * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0 800 * entities. 801 * 802 * @param input the {@code String} to unescape, may be null. 803 * @return a new unescaped {@code String}, {@code null} if null string input. 804 */ 805 public static String unescapeHtml3(final String input) { 806 return UNESCAPE_HTML3.translate(input); 807 } 808 809 /** 810 * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports HTML 4.0 811 * entities. 812 * 813 * <p> 814 * For example, the string {@code "<Français>"} will become {@code "<Fran�ais>"} 815 * </p> 816 * 817 * <p> 818 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string. e.g. {@code ">&zzzz;x"} will become {@code ">&zzzz;x"}. 819 * </p> 820 * 821 * @param input the {@code String} to unescape, may be null. 822 * @return a new unescaped {@code String}, {@code null} if null string input. 823 */ 824 public static String unescapeHtml4(final String input) { 825 return UNESCAPE_HTML4.translate(input); 826 } 827 828 /** 829 * Unescapes any Java literals found in the {@code String}. For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, 830 * unless the {@code '\'} is preceded by another {@code '\'}. 831 * 832 * @param input the {@code String} to unescape, may be null. 833 * @return a new unescaped {@code String}, {@code null} if null string input. 834 */ 835 public static String unescapeJava(final String input) { 836 return UNESCAPE_JAVA.translate(input); 837 } 838 839 /** 840 * Unescapes any Json literals found in the {@code String}. 841 * 842 * <p> 843 * For example, it will turn a sequence of {@code '\'} and {@code 'n'} into a newline character, unless the {@code '\'} is preceded by another {@code '\'}. 844 * </p> 845 * 846 * @param input the {@code String} to unescape, may be null. 847 * @return A new unescaped {@code String}, {@code null} if null string input. 848 * @see #unescapeJava(String) 849 */ 850 public static String unescapeJson(final String input) { 851 return UNESCAPE_JSON.translate(input); 852 } 853 854 /** 855 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode characters corresponding to the escapes. 856 * 857 * <p> 858 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or external entities. 859 * </p> 860 * 861 * <p> 862 * Note that numerical \\u Unicode codes are unescaped to their respective Unicode characters. This may change in future releases. 863 * </p> 864 * 865 * @param input the {@code String} to unescape, may be null. 866 * @return a new unescaped {@code String}, {@code null} if null string input. 867 * @see #escapeXml10(String) 868 * @see #escapeXml11(String) 869 */ 870 public static String unescapeXml(final String input) { 871 return UNESCAPE_XML.translate(input); 872 } 873 874 /** 875 * Unescapes the characters in a {@code String} using XSI rules. 876 * 877 * @param input the {@code String} to unescape, may be null. 878 * @return a new unescaped {@code String}, {@code null} if null string input. 879 * @see StringEscapeUtils#escapeXSI(String) 880 */ 881 public static String unescapeXSI(final String input) { 882 return UNESCAPE_XSI.translate(input); 883 } 884 885 /** 886 * {@code StringEscapeUtils} instances should NOT be constructed in standard programming. 887 * 888 * <p> 889 * Instead, the class should be used as: 890 * </p> 891 * 892 * <pre> 893 * StringEscapeUtils.escapeJava("foo"); 894 * </pre> 895 * 896 * <p> 897 * This constructor is public to permit tools that require a JavaBean instance to operate. 898 * </p> 899 */ 900 public StringEscapeUtils() { 901 } 902 903}