001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.io.IOException; 020import java.io.Writer; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.text.translate.AggregateTranslator; 027import org.apache.commons.text.translate.CharSequenceTranslator; 028import org.apache.commons.text.translate.CsvTranslators; 029import org.apache.commons.text.translate.EntityArrays; 030import org.apache.commons.text.translate.JavaUnicodeEscaper; 031import org.apache.commons.text.translate.LookupTranslator; 032import org.apache.commons.text.translate.NumericEntityEscaper; 033import org.apache.commons.text.translate.NumericEntityUnescaper; 034import org.apache.commons.text.translate.OctalUnescaper; 035import org.apache.commons.text.translate.UnicodeUnescaper; 036import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 037 038/** 039 * <p> 040 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. 041 * </p> 042 * 043 * <p> 044 * #ThreadSafe# 045 * </p> 046 * 047 * <p> 048 * This code has been adapted from Apache Commons Lang 3.5. 049 * </p> 050 * 051 * @since 1.0 052 */ 053public class StringEscapeUtils { 054 055 /* ESCAPE TRANSLATORS */ 056 057 /** 058 * Convenience wrapper for {@link StringBuilder} providing escape methods. 059 * 060 * <p>Example:</p> 061 * <pre> 062 * new Builder(ESCAPE_HTML4) 063 * .append("<p>") 064 * .escape("This is paragraph 1 and special chars like & get escaped.") 065 * .append("</p><p>") 066 * .escape("This is paragraph 2 & more...") 067 * .append("</p>") 068 * .toString() 069 * </pre> 070 */ 071 public static final class Builder { 072 073 /** 074 * StringBuilder to be used in the Builder class. 075 */ 076 private final StringBuilder sb; 077 078 /** 079 * CharSequenceTranslator to be used in the Builder class. 080 */ 081 private final CharSequenceTranslator translator; 082 083 /** 084 * Builder constructor. 085 * 086 * @param translator a CharSequenceTranslator. 087 */ 088 private Builder(final CharSequenceTranslator translator) { 089 this.sb = new StringBuilder(); 090 this.translator = translator; 091 } 092 093 /** 094 * Literal append, no escaping being done. 095 * 096 * @param input the String to append 097 * @return {@code this}, to enable chaining 098 */ 099 public Builder append(final String input) { 100 sb.append(input); 101 return this; 102 } 103 104 /** 105 * Escape {@code input} according to the given {@link CharSequenceTranslator}. 106 * 107 * @param input the String to escape 108 * @return {@code this}, to enable chaining 109 */ 110 public Builder escape(final String input) { 111 sb.append(translator.translate(input)); 112 return this; 113 } 114 115 /** 116 * Return the escaped string. 117 * 118 * @return The escaped string 119 */ 120 @Override 121 public String toString() { 122 return sb.toString(); 123 } 124 } 125 /** 126 * Translator object for unescaping backslash escaped entries. 127 */ 128 static class XsiUnescaper extends CharSequenceTranslator { 129 130 /** 131 * Escaped backslash constant. 132 */ 133 private static final char BACKSLASH = '\\'; 134 135 @Override 136 public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { 137 138 if (index != 0) { 139 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 140 } 141 142 final String s = input.toString(); 143 144 int segmentStart = 0; 145 int searchOffset = 0; 146 while (true) { 147 final int pos = s.indexOf(BACKSLASH, searchOffset); 148 if (pos == -1) { 149 if (segmentStart < s.length()) { 150 writer.write(s.substring(segmentStart)); 151 } 152 break; 153 } 154 if (pos > segmentStart) { 155 writer.write(s.substring(segmentStart, pos)); 156 } 157 segmentStart = pos + 1; 158 searchOffset = pos + 2; 159 } 160 161 return Character.codePointCount(input, 0, input.length()); 162 } 163 } 164 165 /** 166 * Translator object for escaping Java. 167 * 168 * While {@link #escapeJava(String)} is the expected method of use, this 169 * object allows the Java escaping functionality to be used 170 * as the foundation for a custom translator. 171 */ 172 public static final CharSequenceTranslator ESCAPE_JAVA; 173 static { 174 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 175 escapeJavaMap.put("\"", "\\\""); 176 escapeJavaMap.put("\\", "\\\\"); 177 ESCAPE_JAVA = new AggregateTranslator( 178 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 179 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 180 JavaUnicodeEscaper.outsideOf(32, 0x7f) 181 ); 182 } 183 184 /** 185 * Translator object for escaping EcmaScript/JavaScript. 186 * 187 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 188 * object allows the EcmaScript escaping functionality to be used 189 * as the foundation for a custom translator. 190 */ 191 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 192 static { 193 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 194 escapeEcmaScriptMap.put("'", "\\'"); 195 escapeEcmaScriptMap.put("\"", "\\\""); 196 escapeEcmaScriptMap.put("\\", "\\\\"); 197 escapeEcmaScriptMap.put("/", "\\/"); 198 ESCAPE_ECMASCRIPT = new AggregateTranslator( 199 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 200 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 201 JavaUnicodeEscaper.outsideOf(32, 0x7f) 202 ); 203 } 204 205 /** 206 * Translator object for escaping Json. 207 * 208 * While {@link #escapeJson(String)} is the expected method of use, this 209 * object allows the Json escaping functionality to be used 210 * as the foundation for a custom translator. 211 */ 212 public static final CharSequenceTranslator ESCAPE_JSON; 213 static { 214 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 215 escapeJsonMap.put("\"", "\\\""); 216 escapeJsonMap.put("\\", "\\\\"); 217 escapeJsonMap.put("/", "\\/"); 218 ESCAPE_JSON = new AggregateTranslator( 219 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 220 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 221 JavaUnicodeEscaper.outsideOf(32, 0x7e) 222 ); 223 } 224 225 /** 226 * Translator object for escaping XML 1.0. 227 * 228 * While {@link #escapeXml10(String)} is the expected method of use, this 229 * object allows the XML escaping functionality to be used 230 * as the foundation for a custom translator. 231 */ 232 public static final CharSequenceTranslator ESCAPE_XML10; 233 static { 234 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 235 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 236 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 237 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 238 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 239 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 240 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 241 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 242 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 243 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 244 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 245 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 246 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 247 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 248 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 249 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 250 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 251 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 252 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 253 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 254 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 255 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 256 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 257 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 258 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 259 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 260 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 261 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 262 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 263 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 264 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 265 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 266 ESCAPE_XML10 = new AggregateTranslator( 267 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 268 new LookupTranslator(EntityArrays.APOS_ESCAPE), 269 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 270 NumericEntityEscaper.between(0x7f, 0x84), 271 NumericEntityEscaper.between(0x86, 0x9f), 272 new UnicodeUnpairedSurrogateRemover() 273 ); 274 } 275 276 /** 277 * Translator object for escaping XML 1.1. 278 * 279 * While {@link #escapeXml11(String)} is the expected method of use, this 280 * object allows the XML escaping functionality to be used 281 * as the foundation for a custom translator. 282 */ 283 public static final CharSequenceTranslator ESCAPE_XML11; 284 285 static { 286 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 287 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 288 escapeXml11Map.put("\u000b", ""); 289 escapeXml11Map.put("\u000c", ""); 290 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 291 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 292 ESCAPE_XML11 = new AggregateTranslator( 293 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 294 new LookupTranslator(EntityArrays.APOS_ESCAPE), 295 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 296 NumericEntityEscaper.between(0x1, 0x8), 297 NumericEntityEscaper.between(0xe, 0x1f), 298 NumericEntityEscaper.between(0x7f, 0x84), 299 NumericEntityEscaper.between(0x86, 0x9f), 300 new UnicodeUnpairedSurrogateRemover() 301 ); 302 } 303 304 /** 305 * Translator object for escaping HTML version 3.0. 306 * 307 * While {@link #escapeHtml3(String)} is the expected method of use, this 308 * object allows the HTML escaping functionality to be used 309 * as the foundation for a custom translator. 310 */ 311 public static final CharSequenceTranslator ESCAPE_HTML3 = 312 new AggregateTranslator( 313 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 314 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 315 ); 316 317 /** 318 * Translator object for escaping HTML version 4.0. 319 * 320 * While {@link #escapeHtml4(String)} is the expected method of use, this 321 * object allows the HTML escaping functionality to be used 322 * as the foundation for a custom translator. 323 */ 324 public static final CharSequenceTranslator ESCAPE_HTML4 = 325 new AggregateTranslator( 326 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 327 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 328 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 329 ); 330 /** 331 * Translator object for escaping individual Comma Separated Values. 332 * 333 * While {@link #escapeCsv(String)} is the expected method of use, this 334 * object allows the CSV escaping functionality to be used 335 * as the foundation for a custom translator. 336 */ 337 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 338 339 /* UNESCAPE TRANSLATORS */ 340 341 /** 342 * Translator object for escaping Shell command language. 343 * 344 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 345 */ 346 public static final CharSequenceTranslator ESCAPE_XSI; 347 static { 348 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 349 escapeXsiMap.put("|", "\\|"); 350 escapeXsiMap.put("&", "\\&"); 351 escapeXsiMap.put(";", "\\;"); 352 escapeXsiMap.put("<", "\\<"); 353 escapeXsiMap.put(">", "\\>"); 354 escapeXsiMap.put("(", "\\("); 355 escapeXsiMap.put(")", "\\)"); 356 escapeXsiMap.put("$", "\\$"); 357 escapeXsiMap.put("`", "\\`"); 358 escapeXsiMap.put("\\", "\\\\"); 359 escapeXsiMap.put("\"", "\\\""); 360 escapeXsiMap.put("'", "\\'"); 361 escapeXsiMap.put(" ", "\\ "); 362 escapeXsiMap.put("\t", "\\\t"); 363 escapeXsiMap.put("\r\n", StringUtils.EMPTY); 364 escapeXsiMap.put("\n", StringUtils.EMPTY); 365 escapeXsiMap.put("*", "\\*"); 366 escapeXsiMap.put("?", "\\?"); 367 escapeXsiMap.put("[", "\\["); 368 escapeXsiMap.put("#", "\\#"); 369 escapeXsiMap.put("~", "\\~"); 370 escapeXsiMap.put("=", "\\="); 371 escapeXsiMap.put("%", "\\%"); 372 ESCAPE_XSI = new LookupTranslator( 373 Collections.unmodifiableMap(escapeXsiMap) 374 ); 375 } 376 377 /** 378 * Translator object for unescaping escaped Java. 379 * 380 * While {@link #unescapeJava(String)} is the expected method of use, this 381 * object allows the Java unescaping functionality to be used 382 * as the foundation for a custom translator. 383 */ 384 public static final CharSequenceTranslator UNESCAPE_JAVA; 385 386 static { 387 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 388 unescapeJavaMap.put("\\\\", "\\"); 389 unescapeJavaMap.put("\\\"", "\""); 390 unescapeJavaMap.put("\\'", "'"); 391 unescapeJavaMap.put("\\", StringUtils.EMPTY); 392 UNESCAPE_JAVA = new AggregateTranslator( 393 new OctalUnescaper(), // .between('\1', '\377'), 394 new UnicodeUnescaper(), 395 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 396 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 397 ); 398 } 399 400 /** 401 * Translator object for unescaping escaped EcmaScript. 402 * 403 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 404 * object allows the EcmaScript unescaping functionality to be used 405 * as the foundation for a custom translator. 406 */ 407 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 408 409 /** 410 * Translator object for unescaping escaped Json. 411 * 412 * While {@link #unescapeJson(String)} is the expected method of use, this 413 * object allows the Json unescaping functionality to be used 414 * as the foundation for a custom translator. 415 */ 416 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 417 418 /** 419 * Translator object for unescaping escaped HTML 3.0. 420 * 421 * While {@link #unescapeHtml3(String)} is the expected method of use, this 422 * object allows the HTML unescaping functionality to be used 423 * as the foundation for a custom translator. 424 */ 425 public static final CharSequenceTranslator UNESCAPE_HTML3 = 426 new AggregateTranslator( 427 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 428 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 429 new NumericEntityUnescaper() 430 ); 431 432 /** 433 * Translator object for unescaping escaped HTML 4.0. 434 * 435 * While {@link #unescapeHtml4(String)} is the expected method of use, this 436 * object allows the HTML unescaping functionality to be used 437 * as the foundation for a custom translator. 438 */ 439 public static final CharSequenceTranslator UNESCAPE_HTML4 = 440 new AggregateTranslator( 441 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 442 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 443 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 444 new NumericEntityUnescaper() 445 ); 446 447 /** 448 * Translator object for unescaping escaped XML. 449 * 450 * While {@link #unescapeXml(String)} is the expected method of use, this 451 * object allows the XML unescaping functionality to be used 452 * as the foundation for a custom translator. 453 */ 454 public static final CharSequenceTranslator UNESCAPE_XML = 455 new AggregateTranslator( 456 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 457 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 458 new NumericEntityUnescaper() 459 ); 460 461 /** 462 * Translator object for unescaping escaped Comma Separated Value entries. 463 * 464 * While {@link #unescapeCsv(String)} is the expected method of use, this 465 * object allows the CSV unescaping functionality to be used 466 * as the foundation for a custom translator. 467 */ 468 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 469 470 /* Helper functions */ 471 472 /** 473 * Translator object for unescaping escaped XSI Value entries. 474 * 475 * While {@link #unescapeXSI(String)} is the expected method of use, this 476 * object allows the XSI unescaping functionality to be used 477 * as the foundation for a custom translator. 478 */ 479 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 480 481 /** 482 * Gets a {@link Builder}. 483 * @param translator the text translator 484 * @return {@link Builder} 485 */ 486 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 487 return new Builder(translator); 488 } 489 490 /** 491 * Returns a {@code String} value for a CSV column enclosed in double quotes, 492 * if required. 493 * 494 * <p>If the value contains a comma, newline or double quote, then the 495 * String value is returned enclosed in double quotes.</p> 496 * 497 * <p>Any double quote characters in the value are escaped with another double quote.</p> 498 * 499 * <p>If the value does not contain a comma, newline or double quote, then the 500 * String value is returned unchanged.</p> 501 * 502 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 503 * <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 504 * 505 * @param input the input CSV column String, may be null 506 * @return The input String, enclosed in double quotes if the value contains a comma, 507 * newline or double quote, {@code null} if null string input 508 */ 509 public static final String escapeCsv(final String input) { 510 return ESCAPE_CSV.translate(input); 511 } 512 513 /** 514 * Escapes the characters in a {@code String} using EcmaScript String rules. 515 * 516 * <p>Escapes any values it finds into their EcmaScript String form. 517 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 518 * 519 * <p>So a tab becomes the characters {@code '\\'} and 520 * {@code 't'}.</p> 521 * 522 * <p>The only difference between Java strings and EcmaScript strings 523 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 524 * 525 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 526 * 527 * <p>Example:</p> 528 * <pre> 529 * input string: He didn't say, "Stop!" 530 * output string: He didn\'t say, \"Stop!\" 531 * </pre> 532 * 533 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 534 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 535 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 536 * may consider the 537 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. 538 * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 539 * 540 * @param input String to escape values in, may be null 541 * @return String with escaped values, {@code null} if null string input 542 */ 543 public static final String escapeEcmaScript(final String input) { 544 return ESCAPE_ECMASCRIPT.translate(input); 545 } 546 547 /** 548 * Escapes the characters in a {@code String} using HTML entities. 549 * 550 * <p>Supports only the HTML 3.0 entities.</p> 551 * 552 * @param input the {@code String} to escape, may be null 553 * @return a new escaped {@code String}, {@code null} if null string input 554 */ 555 public static final String escapeHtml3(final String input) { 556 return ESCAPE_HTML3.translate(input); 557 } 558 559 // HTML and XML 560 /** 561 * Escapes the characters in a {@code String} using HTML entities. 562 * 563 * <p> 564 * For example: 565 * </p> 566 * <p>{@code "bread" & "butter"}</p> 567 * becomes: 568 * <p> 569 * {@code "bread" &amp; "butter"}. 570 * </p> 571 * 572 * <p>Supports all known HTML 4.0 entities, including funky accents. 573 * Note that the commonly used apostrophe escape character (&apos;) 574 * is not a legal entity and so is not supported).</p> 575 * 576 * @param input the {@code String} to escape, may be null 577 * @return a new escaped {@code String}, {@code null} if null string input 578 * 579 * @see <a href="https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 580 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 581 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 582 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 583 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 584 */ 585 public static final String escapeHtml4(final String input) { 586 return ESCAPE_HTML4.translate(input); 587 } 588 589 // Java and JavaScript 590 /** 591 * Escapes the characters in a {@code String} using Java String rules. 592 * 593 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 594 * 595 * <p>So a tab becomes the characters {@code '\\'} and 596 * {@code 't'}.</p> 597 * 598 * <p>The only difference between Java strings and JavaScript strings 599 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 600 * 601 * <p>Example:</p> 602 * <pre> 603 * input string: He didn't say, "Stop!" 604 * output string: He didn't say, \"Stop!\" 605 * </pre> 606 * 607 * @param input String to escape values in, may be null 608 * @return String with escaped values, {@code null} if null string input 609 */ 610 public static final String escapeJava(final String input) { 611 return ESCAPE_JAVA.translate(input); 612 } 613 614 /** 615 * Escapes the characters in a {@code String} using Json String rules. 616 * 617 * <p>Escapes any values it finds into their Json String form. 618 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 619 * 620 * <p>So a tab becomes the characters {@code '\\'} and 621 * {@code 't'}.</p> 622 * 623 * <p>The only difference between Java strings and Json strings 624 * is that in Json, forward-slash (/) is escaped.</p> 625 * 626 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p> 627 * 628 * <p>Example:</p> 629 * <pre> 630 * input string: He didn't say, "Stop!" 631 * output string: He didn't say, \"Stop!\" 632 * </pre> 633 * 634 * @param input String to escape values in, may be null 635 * @return String with escaped values, {@code null} if null string input 636 */ 637 public static final String escapeJson(final String input) { 638 return ESCAPE_JSON.translate(input); 639 } 640 641 /** 642 * Escapes the characters in a {@code String} using XML entities. 643 * 644 * <p>For example: {@code "bread" & "butter"} => 645 * {@code "bread" & "butter"}. 646 * </p> 647 * 648 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 649 * characters or unpaired Unicode surrogate code points, even after escaping. 650 * {@code escapeXml10} will remove characters that do not fit in the 651 * following ranges:</p> 652 * 653 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 654 * 655 * <p>Though not strictly necessary, {@code escapeXml10} will escape 656 * characters in the following ranges:</p> 657 * 658 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 659 * 660 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 661 * document. If you want to allow more non-text characters in an XML 1.1 662 * document, use {@link #escapeXml11(String)}.</p> 663 * 664 * @param input the {@code String} to escape, may be null 665 * @return a new escaped {@code String}, {@code null} if null string input 666 * @see #unescapeXml(String) 667 */ 668 public static String escapeXml10(final String input) { 669 return ESCAPE_XML10.translate(input); 670 } 671 672 /** 673 * Escapes the characters in a {@code String} using XML entities. 674 * 675 * <p>For example: {@code "bread" & "butter"} => 676 * {@code "bread" & "butter"}. 677 * </p> 678 * 679 * <p>XML 1.1 can represent certain control characters, but it cannot represent 680 * the null byte or unpaired Unicode surrogate code points, even after escaping. 681 * {@code escapeXml11} will remove characters that do not fit in the following 682 * ranges:</p> 683 * 684 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 685 * 686 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 687 * 688 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 689 * 690 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 691 * use it for XML 1.0 documents.</p> 692 * 693 * @param input the {@code String} to escape, may be null 694 * @return a new escaped {@code String}, {@code null} if null string input 695 * @see #unescapeXml(String) 696 */ 697 public static String escapeXml11(final String input) { 698 return ESCAPE_XML11.translate(input); 699 } 700 701 /** 702 * Escapes the characters in a {@code String} using XSI rules. 703 * 704 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 705 * methods provided by {@link ProcessBuilder} or {@link Runtime#exec(String[])} 706 * instead.</p> 707 * 708 * <p>Example:</p> 709 * <pre> 710 * input string: He didn't say, "Stop!" 711 * output string: He\ didn\'t\ say,\ \"Stop!\" 712 * </pre> 713 * 714 * @see <a href="https://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 715 * @param input String to escape values in, may be null 716 * @return String with escaped values, {@code null} if null string input 717 */ 718 public static final String escapeXSI(final String input) { 719 return ESCAPE_XSI.translate(input); 720 } 721 722 /** 723 * Returns a {@code String} value for an unescaped CSV column. 724 * 725 * <p>If the value is enclosed in double quotes, and contains a comma, newline 726 * or double quote, then quotes are removed. 727 * </p> 728 * 729 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 730 * to just one double quote.</p> 731 * 732 * <p>If the value is not enclosed in double quotes, or is and does not contain a 733 * comma, newline or double quote, then the String value is returned unchanged.</p> 734 * 735 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 736 * <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 737 * 738 * @param input the input CSV column String, may be null 739 * @return The input String, with enclosing double quotes removed and embedded double 740 * quotes unescaped, {@code null} if null string input 741 */ 742 public static final String unescapeCsv(final String input) { 743 return UNESCAPE_CSV.translate(input); 744 } 745 746 /** 747 * Unescapes any EcmaScript literals found in the {@code String}. 748 * 749 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 750 * into a newline character, unless the {@code '\'} is preceded by another 751 * {@code '\'}.</p> 752 * 753 * @see #unescapeJava(String) 754 * @param input the {@code String} to unescape, may be null 755 * @return A new unescaped {@code String}, {@code null} if null string input 756 */ 757 public static final String unescapeEcmaScript(final String input) { 758 return UNESCAPE_ECMASCRIPT.translate(input); 759 } 760 761 /** 762 * Unescapes a string containing entity escapes to a string 763 * containing the actual Unicode characters corresponding to the 764 * escapes. Supports only HTML 3.0 entities. 765 * 766 * @param input the {@code String} to unescape, may be null 767 * @return a new unescaped {@code String}, {@code null} if null string input 768 */ 769 public static final String unescapeHtml3(final String input) { 770 return UNESCAPE_HTML3.translate(input); 771 } 772 773 /** 774 * Unescapes a string containing entity escapes to a string 775 * containing the actual Unicode characters corresponding to the 776 * escapes. Supports HTML 4.0 entities. 777 * 778 * <p>For example, the string {@code "<Français>"} 779 * will become {@code "<Fran�ais>"}</p> 780 * 781 * <p>If an entity is unrecognized, it is left alone, and inserted 782 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 783 * become {@code ">&zzzz;x"}.</p> 784 * 785 * @param input the {@code String} to unescape, may be null 786 * @return a new unescaped {@code String}, {@code null} if null string input 787 */ 788 public static final String unescapeHtml4(final String input) { 789 return UNESCAPE_HTML4.translate(input); 790 } 791 792 /** 793 * Unescapes any Java literals found in the {@code String}. 794 * For example, it will turn a sequence of {@code '\'} and 795 * {@code 'n'} into a newline character, unless the {@code '\'} 796 * is preceded by another {@code '\'}. 797 * 798 * @param input the {@code String} to unescape, may be null 799 * @return a new unescaped {@code String}, {@code null} if null string input 800 */ 801 public static final String unescapeJava(final String input) { 802 return UNESCAPE_JAVA.translate(input); 803 } 804 805 /** 806 * Unescapes any Json literals found in the {@code String}. 807 * 808 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 809 * into a newline character, unless the {@code '\'} is preceded by another 810 * {@code '\'}.</p> 811 * 812 * @see #unescapeJava(String) 813 * @param input the {@code String} to unescape, may be null 814 * @return A new unescaped {@code String}, {@code null} if null string input 815 */ 816 public static final String unescapeJson(final String input) { 817 return UNESCAPE_JSON.translate(input); 818 } 819 820 /** 821 * Unescapes a string containing XML entity escapes to a string 822 * containing the actual Unicode characters corresponding to the 823 * escapes. 824 * 825 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 826 * Does not support DTDs or external entities.</p> 827 * 828 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 829 * Unicode characters. This may change in future releases.</p> 830 * 831 * @param input the {@code String} to unescape, may be null 832 * @return a new unescaped {@code String}, {@code null} if null string input 833 * @see #escapeXml10(String) 834 * @see #escapeXml11(String) 835 */ 836 public static final String unescapeXml(final String input) { 837 return UNESCAPE_XML.translate(input); 838 } 839 840 /** 841 * Unescapes the characters in a {@code String} using XSI rules. 842 * 843 * @see StringEscapeUtils#escapeXSI(String) 844 * @param input the {@code String} to unescape, may be null 845 * @return a new unescaped {@code String}, {@code null} if null string input 846 */ 847 public static final String unescapeXSI(final String input) { 848 return UNESCAPE_XSI.translate(input); 849 } 850 851 /** 852 * {@code StringEscapeUtils} instances should NOT be constructed in 853 * standard programming. 854 * 855 * <p>Instead, the class should be used as:</p> 856 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 857 * 858 * <p>This constructor is public to permit tools that require a JavaBean 859 * instance to operate.</p> 860 */ 861 public StringEscapeUtils() { 862 } 863 864}