001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.io.IOException; 020import java.io.Writer; 021import java.util.Collections; 022import java.util.HashMap; 023import java.util.Map; 024 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.text.translate.AggregateTranslator; 027import org.apache.commons.text.translate.CharSequenceTranslator; 028import org.apache.commons.text.translate.CsvTranslators; 029import org.apache.commons.text.translate.EntityArrays; 030import org.apache.commons.text.translate.JavaUnicodeEscaper; 031import org.apache.commons.text.translate.LookupTranslator; 032import org.apache.commons.text.translate.NumericEntityEscaper; 033import org.apache.commons.text.translate.NumericEntityUnescaper; 034import org.apache.commons.text.translate.OctalUnescaper; 035import org.apache.commons.text.translate.UnicodeUnescaper; 036import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 037 038/** 039 * <p> 040 * Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. 041 * </p> 042 * 043 * <p> 044 * #ThreadSafe# 045 * </p> 046 * 047 * <p> 048 * This code has been adapted from Apache Commons Lang 3.5. 049 * </p> 050 * 051 * @since 1.0 052 */ 053public class StringEscapeUtils { 054 055 /* ESCAPE TRANSLATORS */ 056 057 /** 058 * Translator object for escaping Java. 059 * 060 * While {@link #escapeJava(String)} is the expected method of use, this 061 * object allows the Java escaping functionality to be used 062 * as the foundation for a custom translator. 063 */ 064 public static final CharSequenceTranslator ESCAPE_JAVA; 065 static { 066 final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>(); 067 escapeJavaMap.put("\"", "\\\""); 068 escapeJavaMap.put("\\", "\\\\"); 069 ESCAPE_JAVA = new AggregateTranslator( 070 new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), 071 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 072 JavaUnicodeEscaper.outsideOf(32, 0x7f) 073 ); 074 } 075 076 /** 077 * Translator object for escaping EcmaScript/JavaScript. 078 * 079 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 080 * object allows the EcmaScript escaping functionality to be used 081 * as the foundation for a custom translator. 082 */ 083 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; 084 static { 085 final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>(); 086 escapeEcmaScriptMap.put("'", "\\'"); 087 escapeEcmaScriptMap.put("\"", "\\\""); 088 escapeEcmaScriptMap.put("\\", "\\\\"); 089 escapeEcmaScriptMap.put("/", "\\/"); 090 ESCAPE_ECMASCRIPT = new AggregateTranslator( 091 new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), 092 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 093 JavaUnicodeEscaper.outsideOf(32, 0x7f) 094 ); 095 } 096 097 /** 098 * Translator object for escaping Json. 099 * 100 * While {@link #escapeJson(String)} is the expected method of use, this 101 * object allows the Json escaping functionality to be used 102 * as the foundation for a custom translator. 103 */ 104 public static final CharSequenceTranslator ESCAPE_JSON; 105 static { 106 final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>(); 107 escapeJsonMap.put("\"", "\\\""); 108 escapeJsonMap.put("\\", "\\\\"); 109 escapeJsonMap.put("/", "\\/"); 110 ESCAPE_JSON = new AggregateTranslator( 111 new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), 112 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), 113 JavaUnicodeEscaper.outsideOf(32, 0x7e) 114 ); 115 } 116 117 /** 118 * Translator object for escaping XML 1.0. 119 * 120 * While {@link #escapeXml10(String)} is the expected method of use, this 121 * object allows the XML escaping functionality to be used 122 * as the foundation for a custom translator. 123 */ 124 public static final CharSequenceTranslator ESCAPE_XML10; 125 static { 126 final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>(); 127 escapeXml10Map.put("\u0000", StringUtils.EMPTY); 128 escapeXml10Map.put("\u0001", StringUtils.EMPTY); 129 escapeXml10Map.put("\u0002", StringUtils.EMPTY); 130 escapeXml10Map.put("\u0003", StringUtils.EMPTY); 131 escapeXml10Map.put("\u0004", StringUtils.EMPTY); 132 escapeXml10Map.put("\u0005", StringUtils.EMPTY); 133 escapeXml10Map.put("\u0006", StringUtils.EMPTY); 134 escapeXml10Map.put("\u0007", StringUtils.EMPTY); 135 escapeXml10Map.put("\u0008", StringUtils.EMPTY); 136 escapeXml10Map.put("\u000b", StringUtils.EMPTY); 137 escapeXml10Map.put("\u000c", StringUtils.EMPTY); 138 escapeXml10Map.put("\u000e", StringUtils.EMPTY); 139 escapeXml10Map.put("\u000f", StringUtils.EMPTY); 140 escapeXml10Map.put("\u0010", StringUtils.EMPTY); 141 escapeXml10Map.put("\u0011", StringUtils.EMPTY); 142 escapeXml10Map.put("\u0012", StringUtils.EMPTY); 143 escapeXml10Map.put("\u0013", StringUtils.EMPTY); 144 escapeXml10Map.put("\u0014", StringUtils.EMPTY); 145 escapeXml10Map.put("\u0015", StringUtils.EMPTY); 146 escapeXml10Map.put("\u0016", StringUtils.EMPTY); 147 escapeXml10Map.put("\u0017", StringUtils.EMPTY); 148 escapeXml10Map.put("\u0018", StringUtils.EMPTY); 149 escapeXml10Map.put("\u0019", StringUtils.EMPTY); 150 escapeXml10Map.put("\u001a", StringUtils.EMPTY); 151 escapeXml10Map.put("\u001b", StringUtils.EMPTY); 152 escapeXml10Map.put("\u001c", StringUtils.EMPTY); 153 escapeXml10Map.put("\u001d", StringUtils.EMPTY); 154 escapeXml10Map.put("\u001e", StringUtils.EMPTY); 155 escapeXml10Map.put("\u001f", StringUtils.EMPTY); 156 escapeXml10Map.put("\ufffe", StringUtils.EMPTY); 157 escapeXml10Map.put("\uffff", StringUtils.EMPTY); 158 ESCAPE_XML10 = new AggregateTranslator( 159 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 160 new LookupTranslator(EntityArrays.APOS_ESCAPE), 161 new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), 162 NumericEntityEscaper.between(0x7f, 0x84), 163 NumericEntityEscaper.between(0x86, 0x9f), 164 new UnicodeUnpairedSurrogateRemover() 165 ); 166 } 167 168 /** 169 * Translator object for escaping XML 1.1. 170 * 171 * While {@link #escapeXml11(String)} is the expected method of use, this 172 * object allows the XML escaping functionality to be used 173 * as the foundation for a custom translator. 174 */ 175 public static final CharSequenceTranslator ESCAPE_XML11; 176 static { 177 final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>(); 178 escapeXml11Map.put("\u0000", StringUtils.EMPTY); 179 escapeXml11Map.put("\u000b", ""); 180 escapeXml11Map.put("\u000c", ""); 181 escapeXml11Map.put("\ufffe", StringUtils.EMPTY); 182 escapeXml11Map.put("\uffff", StringUtils.EMPTY); 183 ESCAPE_XML11 = new AggregateTranslator( 184 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 185 new LookupTranslator(EntityArrays.APOS_ESCAPE), 186 new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), 187 NumericEntityEscaper.between(0x1, 0x8), 188 NumericEntityEscaper.between(0xe, 0x1f), 189 NumericEntityEscaper.between(0x7f, 0x84), 190 NumericEntityEscaper.between(0x86, 0x9f), 191 new UnicodeUnpairedSurrogateRemover() 192 ); 193 } 194 195 /** 196 * Translator object for escaping HTML version 3.0. 197 * 198 * While {@link #escapeHtml3(String)} is the expected method of use, this 199 * object allows the HTML escaping functionality to be used 200 * as the foundation for a custom translator. 201 */ 202 public static final CharSequenceTranslator ESCAPE_HTML3 = 203 new AggregateTranslator( 204 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 205 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) 206 ); 207 208 /** 209 * Translator object for escaping HTML version 4.0. 210 * 211 * While {@link #escapeHtml4(String)} is the expected method of use, this 212 * object allows the HTML escaping functionality to be used 213 * as the foundation for a custom translator. 214 */ 215 public static final CharSequenceTranslator ESCAPE_HTML4 = 216 new AggregateTranslator( 217 new LookupTranslator(EntityArrays.BASIC_ESCAPE), 218 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), 219 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) 220 ); 221 222 /** 223 * Translator object for escaping individual Comma Separated Values. 224 * 225 * While {@link #escapeCsv(String)} is the expected method of use, this 226 * object allows the CSV escaping functionality to be used 227 * as the foundation for a custom translator. 228 */ 229 public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); 230 231 /** 232 * Translator object for escaping Shell command language. 233 * 234 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 235 */ 236 public static final CharSequenceTranslator ESCAPE_XSI; 237 static { 238 final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>(); 239 escapeXsiMap.put("|", "\\|"); 240 escapeXsiMap.put("&", "\\&"); 241 escapeXsiMap.put(";", "\\;"); 242 escapeXsiMap.put("<", "\\<"); 243 escapeXsiMap.put(">", "\\>"); 244 escapeXsiMap.put("(", "\\("); 245 escapeXsiMap.put(")", "\\)"); 246 escapeXsiMap.put("$", "\\$"); 247 escapeXsiMap.put("`", "\\`"); 248 escapeXsiMap.put("\\", "\\\\"); 249 escapeXsiMap.put("\"", "\\\""); 250 escapeXsiMap.put("'", "\\'"); 251 escapeXsiMap.put(" ", "\\ "); 252 escapeXsiMap.put("\t", "\\\t"); 253 escapeXsiMap.put("\r\n", StringUtils.EMPTY); 254 escapeXsiMap.put("\n", StringUtils.EMPTY); 255 escapeXsiMap.put("*", "\\*"); 256 escapeXsiMap.put("?", "\\?"); 257 escapeXsiMap.put("[", "\\["); 258 escapeXsiMap.put("#", "\\#"); 259 escapeXsiMap.put("~", "\\~"); 260 escapeXsiMap.put("=", "\\="); 261 escapeXsiMap.put("%", "\\%"); 262 ESCAPE_XSI = new LookupTranslator( 263 Collections.unmodifiableMap(escapeXsiMap) 264 ); 265 } 266 267 /* UNESCAPE TRANSLATORS */ 268 269 /** 270 * Translator object for unescaping escaped Java. 271 * 272 * While {@link #unescapeJava(String)} is the expected method of use, this 273 * object allows the Java unescaping functionality to be used 274 * as the foundation for a custom translator. 275 */ 276 public static final CharSequenceTranslator UNESCAPE_JAVA; 277 static { 278 final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>(); 279 unescapeJavaMap.put("\\\\", "\\"); 280 unescapeJavaMap.put("\\\"", "\""); 281 unescapeJavaMap.put("\\'", "'"); 282 unescapeJavaMap.put("\\", StringUtils.EMPTY); 283 UNESCAPE_JAVA = new AggregateTranslator( 284 new OctalUnescaper(), // .between('\1', '\377'), 285 new UnicodeUnescaper(), 286 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), 287 new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) 288 ); 289 } 290 291 /** 292 * Translator object for unescaping escaped EcmaScript. 293 * 294 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 295 * object allows the EcmaScript unescaping functionality to be used 296 * as the foundation for a custom translator. 297 */ 298 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 299 300 /** 301 * Translator object for unescaping escaped Json. 302 * 303 * While {@link #unescapeJson(String)} is the expected method of use, this 304 * object allows the Json unescaping functionality to be used 305 * as the foundation for a custom translator. 306 */ 307 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 308 309 /** 310 * Translator object for unescaping escaped HTML 3.0. 311 * 312 * While {@link #unescapeHtml3(String)} is the expected method of use, this 313 * object allows the HTML unescaping functionality to be used 314 * as the foundation for a custom translator. 315 */ 316 public static final CharSequenceTranslator UNESCAPE_HTML3 = 317 new AggregateTranslator( 318 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 319 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 320 new NumericEntityUnescaper() 321 ); 322 323 /** 324 * Translator object for unescaping escaped HTML 4.0. 325 * 326 * While {@link #unescapeHtml4(String)} is the expected method of use, this 327 * object allows the HTML unescaping functionality to be used 328 * as the foundation for a custom translator. 329 */ 330 public static final CharSequenceTranslator UNESCAPE_HTML4 = 331 new AggregateTranslator( 332 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 333 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), 334 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), 335 new NumericEntityUnescaper() 336 ); 337 338 /** 339 * Translator object for unescaping escaped XML. 340 * 341 * While {@link #unescapeXml(String)} is the expected method of use, this 342 * object allows the XML unescaping functionality to be used 343 * as the foundation for a custom translator. 344 */ 345 public static final CharSequenceTranslator UNESCAPE_XML = 346 new AggregateTranslator( 347 new LookupTranslator(EntityArrays.BASIC_UNESCAPE), 348 new LookupTranslator(EntityArrays.APOS_UNESCAPE), 349 new NumericEntityUnescaper() 350 ); 351 352 /** 353 * Translator object for unescaping escaped Comma Separated Value entries. 354 * 355 * While {@link #unescapeCsv(String)} is the expected method of use, this 356 * object allows the CSV unescaping functionality to be used 357 * as the foundation for a custom translator. 358 */ 359 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); 360 361 /** 362 * Translator object for unescaping escaped XSI Value entries. 363 * 364 * While {@link #unescapeXSI(String)} is the expected method of use, this 365 * object allows the XSI unescaping functionality to be used 366 * as the foundation for a custom translator. 367 */ 368 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 369 370 /** 371 * Translator object for unescaping backslash escaped entries. 372 */ 373 static class XsiUnescaper extends CharSequenceTranslator { 374 375 /** 376 * Escaped backslash constant. 377 */ 378 private static final char BACKSLASH = '\\'; 379 380 @Override 381 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 382 383 if (index != 0) { 384 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 385 } 386 387 final String s = input.toString(); 388 389 int segmentStart = 0; 390 int searchOffset = 0; 391 while (true) { 392 final int pos = s.indexOf(BACKSLASH, searchOffset); 393 if (pos == -1) { 394 if (segmentStart < s.length()) { 395 out.write(s.substring(segmentStart)); 396 } 397 break; 398 } 399 if (pos > segmentStart) { 400 out.write(s.substring(segmentStart, pos)); 401 } 402 segmentStart = pos + 1; 403 searchOffset = pos + 2; 404 } 405 406 return Character.codePointCount(input, 0, input.length()); 407 } 408 } 409 410 /* Helper functions */ 411 412 /** 413 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 414 * standard programming.</p> 415 * 416 * <p>Instead, the class should be used as:</p> 417 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 418 * 419 * <p>This constructor is public to permit tools that require a JavaBean 420 * instance to operate.</p> 421 */ 422 public StringEscapeUtils() { 423 super(); 424 } 425 426 /** 427 * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p> 428 * 429 * <p>Example:</p> 430 * <pre> 431 * new Builder(ESCAPE_HTML4) 432 * .append("<p>") 433 * .escape("This is paragraph 1 and special chars like & get escaped.") 434 * .append("</p><p>") 435 * .escape("This is paragraph 2 & more...") 436 * .append("</p>") 437 * .toString() 438 * </pre> 439 * 440 */ 441 public static final class Builder { 442 443 /** 444 * StringBuilder to be used in the Builder class. 445 */ 446 private final StringBuilder sb; 447 448 /** 449 * CharSequenceTranslator to be used in the Builder class. 450 */ 451 private final CharSequenceTranslator translator; 452 453 /** 454 * Builder constructor. 455 * 456 * @param translator a CharSequenceTranslator. 457 */ 458 private Builder(final CharSequenceTranslator translator) { 459 this.sb = new StringBuilder(); 460 this.translator = translator; 461 } 462 463 /** 464 * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p> 465 * 466 * @param input the String to escape 467 * @return {@code this}, to enable chaining 468 */ 469 public Builder escape(final String input) { 470 sb.append(translator.translate(input)); 471 return this; 472 } 473 474 /** 475 * Literal append, no escaping being done. 476 * 477 * @param input the String to append 478 * @return {@code this}, to enable chaining 479 */ 480 public Builder append(final String input) { 481 sb.append(input); 482 return this; 483 } 484 485 /** 486 * <p>Return the escaped string.</p> 487 * 488 * @return The escaped string 489 */ 490 @Override 491 public String toString() { 492 return sb.toString(); 493 } 494 } 495 496 /** 497 * Get a {@link Builder}. 498 * @param translator the text translator 499 * @return {@link Builder} 500 */ 501 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 502 return new Builder(translator); 503 } 504 505 // Java and JavaScript 506 //-------------------------------------------------------------------------- 507 /** 508 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 509 * 510 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 511 * 512 * <p>So a tab becomes the characters {@code '\\'} and 513 * {@code 't'}.</p> 514 * 515 * <p>The only difference between Java strings and JavaScript strings 516 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 517 * 518 * <p>Example:</p> 519 * <pre> 520 * input string: He didn't say, "Stop!" 521 * output string: He didn't say, \"Stop!\" 522 * </pre> 523 * 524 * @param input String to escape values in, may be null 525 * @return String with escaped values, {@code null} if null string input 526 */ 527 public static final String escapeJava(final String input) { 528 return ESCAPE_JAVA.translate(input); 529 } 530 531 /** 532 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 533 * <p>Escapes any values it finds into their EcmaScript String form. 534 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 535 * 536 * <p>So a tab becomes the characters {@code '\\'} and 537 * {@code 't'}.</p> 538 * 539 * <p>The only difference between Java strings and EcmaScript strings 540 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 541 * 542 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 543 * 544 * <p>Example:</p> 545 * <pre> 546 * input string: He didn't say, "Stop!" 547 * output string: He didn\'t say, \"Stop!\" 548 * </pre> 549 * 550 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 551 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 552 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 553 * may consider the 554 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. 555 * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 556 * 557 * @param input String to escape values in, may be null 558 * @return String with escaped values, {@code null} if null string input 559 */ 560 public static final String escapeEcmaScript(final String input) { 561 return ESCAPE_ECMASCRIPT.translate(input); 562 } 563 564 /** 565 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 566 * <p>Escapes any values it finds into their Json String form. 567 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 568 * 569 * <p>So a tab becomes the characters {@code '\\'} and 570 * {@code 't'}.</p> 571 * 572 * <p>The only difference between Java strings and Json strings 573 * is that in Json, forward-slash (/) is escaped.</p> 574 * 575 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p> 576 * 577 * <p>Example:</p> 578 * <pre> 579 * input string: He didn't say, "Stop!" 580 * output string: He didn't say, \"Stop!\" 581 * </pre> 582 * 583 * @param input String to escape values in, may be null 584 * @return String with escaped values, {@code null} if null string input 585 */ 586 public static final String escapeJson(final String input) { 587 return ESCAPE_JSON.translate(input); 588 } 589 590 /** 591 * <p>Unescapes any Java literals found in the {@code String}. 592 * For example, it will turn a sequence of {@code '\'} and 593 * {@code 'n'} into a newline character, unless the {@code '\'} 594 * is preceded by another {@code '\'}.</p> 595 * 596 * @param input the {@code String} to unescape, may be null 597 * @return a new unescaped {@code String}, {@code null} if null string input 598 */ 599 public static final String unescapeJava(final String input) { 600 return UNESCAPE_JAVA.translate(input); 601 } 602 603 /** 604 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 605 * 606 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 607 * into a newline character, unless the {@code '\'} is preceded by another 608 * {@code '\'}.</p> 609 * 610 * @see #unescapeJava(String) 611 * @param input the {@code String} to unescape, may be null 612 * @return A new unescaped {@code String}, {@code null} if null string input 613 */ 614 public static final String unescapeEcmaScript(final String input) { 615 return UNESCAPE_ECMASCRIPT.translate(input); 616 } 617 618 /** 619 * <p>Unescapes any Json literals found in the {@code String}.</p> 620 * 621 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 622 * into a newline character, unless the {@code '\'} is preceded by another 623 * {@code '\'}.</p> 624 * 625 * @see #unescapeJava(String) 626 * @param input the {@code String} to unescape, may be null 627 * @return A new unescaped {@code String}, {@code null} if null string input 628 */ 629 public static final String unescapeJson(final String input) { 630 return UNESCAPE_JSON.translate(input); 631 } 632 633 // HTML and XML 634 //-------------------------------------------------------------------------- 635 /** 636 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 637 * 638 * <p> 639 * For example: 640 * </p> 641 * <p>{@code "bread" & "butter"}</p> 642 * becomes: 643 * <p> 644 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 645 * </p> 646 * 647 * <p>Supports all known HTML 4.0 entities, including funky accents. 648 * Note that the commonly used apostrophe escape character (&apos;) 649 * is not a legal entity and so is not supported).</p> 650 * 651 * @param input the {@code String} to escape, may be null 652 * @return a new escaped {@code String}, {@code null} if null string input 653 * 654 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 655 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 656 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 657 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 658 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 659 */ 660 public static final String escapeHtml4(final String input) { 661 return ESCAPE_HTML4.translate(input); 662 } 663 664 /** 665 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 666 * <p>Supports only the HTML 3.0 entities.</p> 667 * 668 * @param input the {@code String} to escape, may be null 669 * @return a new escaped {@code String}, {@code null} if null string input 670 */ 671 public static final String escapeHtml3(final String input) { 672 return ESCAPE_HTML3.translate(input); 673 } 674 675 //----------------------------------------------------------------------- 676 /** 677 * <p>Unescapes a string containing entity escapes to a string 678 * containing the actual Unicode characters corresponding to the 679 * escapes. Supports HTML 4.0 entities.</p> 680 * 681 * <p>For example, the string {@code "<Français>"} 682 * will become {@code "<Fran�ais>"}</p> 683 * 684 * <p>If an entity is unrecognized, it is left alone, and inserted 685 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 686 * become {@code ">&zzzz;x"}.</p> 687 * 688 * @param input the {@code String} to unescape, may be null 689 * @return a new unescaped {@code String}, {@code null} if null string input 690 */ 691 public static final String unescapeHtml4(final String input) { 692 return UNESCAPE_HTML4.translate(input); 693 } 694 695 /** 696 * <p>Unescapes a string containing entity escapes to a string 697 * containing the actual Unicode characters corresponding to the 698 * escapes. Supports only HTML 3.0 entities.</p> 699 * 700 * @param input the {@code String} to unescape, may be null 701 * @return a new unescaped {@code String}, {@code null} if null string input 702 */ 703 public static final String unescapeHtml3(final String input) { 704 return UNESCAPE_HTML3.translate(input); 705 } 706 707 /** 708 * <p>Escapes the characters in a {@code String} using XML entities.</p> 709 * 710 * <p>For example: {@code "bread" & "butter"} => 711 * {@code "bread" & "butter"}. 712 * </p> 713 * 714 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 715 * characters or unpaired Unicode surrogate codepoints, even after escaping. 716 * {@code escapeXml10} will remove characters that do not fit in the 717 * following ranges:</p> 718 * 719 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 720 * 721 * <p>Though not strictly necessary, {@code escapeXml10} will escape 722 * characters in the following ranges:</p> 723 * 724 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 725 * 726 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 727 * document. If you want to allow more non-text characters in an XML 1.1 728 * document, use {@link #escapeXml11(String)}.</p> 729 * 730 * @param input the {@code String} to escape, may be null 731 * @return a new escaped {@code String}, {@code null} if null string input 732 * @see #unescapeXml(java.lang.String) 733 */ 734 public static String escapeXml10(final String input) { 735 return ESCAPE_XML10.translate(input); 736 } 737 738 /** 739 * <p>Escapes the characters in a {@code String} using XML entities.</p> 740 * 741 * <p>For example: {@code "bread" & "butter"} => 742 * {@code "bread" & "butter"}. 743 * </p> 744 * 745 * <p>XML 1.1 can represent certain control characters, but it cannot represent 746 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 747 * {@code escapeXml11} will remove characters that do not fit in the following 748 * ranges:</p> 749 * 750 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 751 * 752 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 753 * 754 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 755 * 756 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 757 * use it for XML 1.0 documents.</p> 758 * 759 * @param input the {@code String} to escape, may be null 760 * @return a new escaped {@code String}, {@code null} if null string input 761 * @see #unescapeXml(java.lang.String) 762 */ 763 public static String escapeXml11(final String input) { 764 return ESCAPE_XML11.translate(input); 765 } 766 767 //----------------------------------------------------------------------- 768 /** 769 * <p>Unescapes a string containing XML entity escapes to a string 770 * containing the actual Unicode characters corresponding to the 771 * escapes.</p> 772 * 773 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 774 * Does not support DTDs or external entities.</p> 775 * 776 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 777 * Unicode characters. This may change in future releases.</p> 778 * 779 * @param input the {@code String} to unescape, may be null 780 * @return a new unescaped {@code String}, {@code null} if null string input 781 * @see #escapeXml10(String) 782 * @see #escapeXml11(String) 783 */ 784 public static final String unescapeXml(final String input) { 785 return UNESCAPE_XML.translate(input); 786 } 787 788 //----------------------------------------------------------------------- 789 790 /** 791 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 792 * if required.</p> 793 * 794 * <p>If the value contains a comma, newline or double quote, then the 795 * String value is returned enclosed in double quotes.</p> 796 * 797 * <p>Any double quote characters in the value are escaped with another double quote.</p> 798 * 799 * <p>If the value does not contain a comma, newline or double quote, then the 800 * String value is returned unchanged.</p> 801 * 802 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 803 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 804 * 805 * @param input the input CSV column String, may be null 806 * @return The input String, enclosed in double quotes if the value contains a comma, 807 * newline or double quote, {@code null} if null string input 808 */ 809 public static final String escapeCsv(final String input) { 810 return ESCAPE_CSV.translate(input); 811 } 812 813 /** 814 * <p>Returns a {@code String} value for an unescaped CSV column.</p> 815 * 816 * <p>If the value is enclosed in double quotes, and contains a comma, newline 817 * or double quote, then quotes are removed. 818 * </p> 819 * 820 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 821 * to just one double quote.</p> 822 * 823 * <p>If the value is not enclosed in double quotes, or is and does not contain a 824 * comma, newline or double quote, then the String value is returned unchanged.</p> 825 * 826 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 827 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 828 * 829 * @param input the input CSV column String, may be null 830 * @return The input String, with enclosing double quotes removed and embedded double 831 * quotes unescaped, {@code null} if null string input 832 */ 833 public static final String unescapeCsv(final String input) { 834 return UNESCAPE_CSV.translate(input); 835 } 836 837 /** 838 * <p>Escapes the characters in a {@code String} using XSI rules.</p> 839 * 840 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 841 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])} 842 * instead.</p> 843 * 844 * <p>Example:</p> 845 * <pre> 846 * input string: He didn't say, "Stop!" 847 * output string: He\ didn\'t\ say,\ \"Stop!\" 848 * </pre> 849 * 850 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 851 * @param input String to escape values in, may be null 852 * @return String with escaped values, {@code null} if null string input 853 */ 854 public static final String escapeXSI(final String input) { 855 return ESCAPE_XSI.translate(input); 856 } 857 858 /** 859 * <p>Unescapes the characters in a {@code String} using XSI rules.</p> 860 * 861 * @see StringEscapeUtils#escapeXSI(String) 862 * @param input the {@code String} to unescape, may be null 863 * @return a new unescaped {@code String}, {@code null} if null string input 864 */ 865 public static final String unescapeXSI(final String input) { 866 return UNESCAPE_XSI.translate(input); 867 } 868 869}