1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.text; 18 19 import org.apache.commons.lang3.CharUtils; 20 import org.apache.commons.lang3.StringUtils; 21 import org.apache.commons.text.translate.AggregateTranslator; 22 import org.apache.commons.text.translate.CharSequenceTranslator; 23 import org.apache.commons.text.translate.EntityArrays; 24 import org.apache.commons.text.translate.JavaUnicodeEscaper; 25 import org.apache.commons.text.translate.LookupTranslator; 26 import org.apache.commons.text.translate.NumericEntityEscaper; 27 import org.apache.commons.text.translate.NumericEntityUnescaper; 28 import org.apache.commons.text.translate.OctalUnescaper; 29 import org.apache.commons.text.translate.SingleLookupTranslator; 30 import org.apache.commons.text.translate.UnicodeUnescaper; 31 import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; 32 33 import java.io.IOException; 34 import java.io.Writer; 35 36 /** 37 * <p>Escapes and unescapes {@code String}s for 38 * Java, Java Script, HTML and XML.</p> 39 * 40 * <p>#ThreadSafe#</p> 41 * 42 * 43 * <p> 44 * This code has been adapted from Apache Commons Lang 3.5. 45 * </p> 46 * 47 * @since 1.0 48 */ 49 public class StringEscapeUtils { 50 51 /* ESCAPE TRANSLATORS */ 52 53 /** 54 * Translator object for escaping Java. 55 * 56 * While {@link #escapeJava(String)} is the expected method of use, this 57 * object allows the Java escaping functionality to be used 58 * as the foundation for a custom translator. 59 */ 60 public static final CharSequenceTranslator ESCAPE_JAVA = 61 new LookupTranslator( 62 new String[][] { 63 {"\"", "\\\""}, 64 {"\\", "\\\\"}, 65 }).with( 66 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 67 ).with( 68 JavaUnicodeEscaper.outsideOf(32, 0x7f) 69 ); 70 71 /** 72 * Translator object for escaping EcmaScript/JavaScript. 73 * 74 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 75 * object allows the EcmaScript escaping functionality to be used 76 * as the foundation for a custom translator. 77 */ 78 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 79 new AggregateTranslator( 80 new LookupTranslator( 81 new String[][] { 82 {"'", "\\'"}, 83 {"\"", "\\\""}, 84 {"\\", "\\\\"}, 85 {"/", "\\/"} 86 }), 87 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 88 JavaUnicodeEscaper.outsideOf(32, 0x7f) 89 ); 90 91 /** 92 * Translator object for escaping Json. 93 * 94 * While {@link #escapeJson(String)} is the expected method of use, this 95 * object allows the Json escaping functionality to be used 96 * as the foundation for a custom translator. 97 */ 98 public static final CharSequenceTranslator ESCAPE_JSON = 99 new AggregateTranslator( 100 new LookupTranslator( 101 new String[][] { 102 {"\"", "\\\""}, 103 {"\\", "\\\\"}, 104 {"/", "\\/"} 105 }), 106 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 107 JavaUnicodeEscaper.outsideOf(32, 0x7f) 108 ); 109 110 /** 111 * Translator object for escaping XML 1.0. 112 * 113 * While {@link #escapeXml10(String)} is the expected method of use, this 114 * object allows the XML escaping functionality to be used 115 * as the foundation for a custom translator. 116 */ 117 public static final CharSequenceTranslator ESCAPE_XML10 = 118 new AggregateTranslator( 119 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 120 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 121 new LookupTranslator( 122 new String[][] { 123 { "\u0000", StringUtils.EMPTY }, 124 { "\u0001", StringUtils.EMPTY }, 125 { "\u0002", StringUtils.EMPTY }, 126 { "\u0003", StringUtils.EMPTY }, 127 { "\u0004", StringUtils.EMPTY }, 128 { "\u0005", StringUtils.EMPTY }, 129 { "\u0006", StringUtils.EMPTY }, 130 { "\u0007", StringUtils.EMPTY }, 131 { "\u0008", StringUtils.EMPTY }, 132 { "\u000b", StringUtils.EMPTY }, 133 { "\u000c", StringUtils.EMPTY }, 134 { "\u000e", StringUtils.EMPTY }, 135 { "\u000f", StringUtils.EMPTY }, 136 { "\u0010", StringUtils.EMPTY }, 137 { "\u0011", StringUtils.EMPTY }, 138 { "\u0012", StringUtils.EMPTY }, 139 { "\u0013", StringUtils.EMPTY }, 140 { "\u0014", StringUtils.EMPTY }, 141 { "\u0015", StringUtils.EMPTY }, 142 { "\u0016", StringUtils.EMPTY }, 143 { "\u0017", StringUtils.EMPTY }, 144 { "\u0018", StringUtils.EMPTY }, 145 { "\u0019", StringUtils.EMPTY }, 146 { "\u001a", StringUtils.EMPTY }, 147 { "\u001b", StringUtils.EMPTY }, 148 { "\u001c", StringUtils.EMPTY }, 149 { "\u001d", StringUtils.EMPTY }, 150 { "\u001e", StringUtils.EMPTY }, 151 { "\u001f", StringUtils.EMPTY }, 152 { "\ufffe", StringUtils.EMPTY }, 153 { "\uffff", StringUtils.EMPTY } 154 }), 155 NumericEntityEscaper.between(0x7f, 0x84), 156 NumericEntityEscaper.between(0x86, 0x9f), 157 new UnicodeUnpairedSurrogateRemover() 158 ); 159 160 /** 161 * Translator object for escaping XML 1.1. 162 * 163 * While {@link #escapeXml11(String)} is the expected method of use, this 164 * object allows the XML escaping functionality to be used 165 * as the foundation for a custom translator. 166 */ 167 public static final CharSequenceTranslator ESCAPE_XML11 = 168 new AggregateTranslator( 169 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 170 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 171 new LookupTranslator( 172 new String[][] { 173 { "\u0000", StringUtils.EMPTY }, 174 { "\u000b", "" }, 175 { "\u000c", "" }, 176 { "\ufffe", StringUtils.EMPTY }, 177 { "\uffff", StringUtils.EMPTY } 178 }), 179 NumericEntityEscaper.between(0x1, 0x8), 180 NumericEntityEscaper.between(0xe, 0x1f), 181 NumericEntityEscaper.between(0x7f, 0x84), 182 NumericEntityEscaper.between(0x86, 0x9f), 183 new UnicodeUnpairedSurrogateRemover() 184 ); 185 186 /** 187 * Translator object for escaping HTML version 3.0. 188 * 189 * While {@link #escapeHtml3(String)} is the expected method of use, this 190 * object allows the HTML escaping functionality to be used 191 * as the foundation for a custom translator. 192 */ 193 public static final CharSequenceTranslator ESCAPE_HTML3 = 194 new AggregateTranslator( 195 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 196 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 197 ); 198 199 /** 200 * The improved translator object for escaping HTML version 3.0. 201 * The 'improved' part of this translator is that it checks if the html is already translated. 202 * This check prevents double, triple, or recursive translations. 203 * 204 * While {@link #escapeHtml3Once(String)} is the expected method of use, this 205 * object allows the HTML escaping functionality to be used 206 * as the foundation for a custom translator. 207 * 208 * Note that, multiple lookup tables should be passed to this translator 209 * instead of passing multiple instances of this translator to the 210 * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the 211 * lookup table passed to that instance while deciding whether a value is 212 * already translated or not. 213 */ 214 public static final CharSequenceTranslator ESCAPE_HTML3_ONCE = 215 new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE()); 216 217 218 /** 219 * Translator object for escaping HTML version 4.0. 220 * 221 * While {@link #escapeHtml4(String)} is the expected method of use, this 222 * object allows the HTML escaping functionality to be used 223 * as the foundation for a custom translator. 224 */ 225 public static final CharSequenceTranslator ESCAPE_HTML4 = 226 new AggregateTranslator( 227 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 228 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 229 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 230 ); 231 232 /** 233 * The improved translator object for escaping HTML version 4.0. 234 * The 'improved' part of this translator is that it checks if the html is already translated. 235 * This check prevents double, triple, or recursive translations. 236 * 237 * While {@link #escapeHtml4Once(String)} is the expected method of use, this 238 * object allows the HTML escaping functionality to be used 239 * as the foundation for a custom translator. 240 * 241 * Note that, multiple lookup tables should be passed to this translator 242 * instead of passing multiple instances of this translator to the 243 * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the 244 * lookup table passed to that instance while deciding whether a value is 245 * already translated or not. 246 */ 247 public static final CharSequenceTranslator ESCAPE_HTML4_ONCE = 248 new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE(), EntityArrays.HTML40_EXTENDED_ESCAPE()); 249 250 /** 251 * Translator object for escaping individual Comma Separated Values. 252 * 253 * While {@link #escapeCsv(String)} is the expected method of use, this 254 * object allows the CSV escaping functionality to be used 255 * as the foundation for a custom translator. 256 */ 257 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 258 259 // TODO: Create a parent class - 'SinglePassTranslator' ? 260 // It would handle the index checking + length returning, 261 // and could also have an optimization check method. 262 static class CsvEscaper extends CharSequenceTranslator { 263 264 private static final char CSV_DELIMITER = ','; 265 private static final char CSV_QUOTE = '"'; 266 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 267 private static final char[] CSV_SEARCH_CHARS = 268 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 269 270 @Override 271 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 272 273 if(index != 0) { 274 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 275 } 276 277 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 278 out.write(input.toString()); 279 } else { 280 out.write(CSV_QUOTE); 281 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 282 out.write(CSV_QUOTE); 283 } 284 return Character.codePointCount(input, 0, input.length()); 285 } 286 } 287 288 /** 289 * Translator object for escaping Shell command language. 290 * 291 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 292 */ 293 public static final CharSequenceTranslator ESCAPE_XSI = 294 new LookupTranslator( 295 new String[][] { 296 {"|", "\\|"}, 297 {"&", "\\&"}, 298 {";", "\\;"}, 299 {"<", "\\<"}, 300 {">", "\\>"}, 301 {"(", "\\("}, 302 {")", "\\)"}, 303 {"$", "\\$"}, 304 {"`", "\\`"}, 305 {"\\", "\\\\"}, 306 {"\"", "\\\""}, 307 {"'", "\\'"}, 308 {" ", "\\ "}, 309 {"\t", "\\\t"}, 310 {"\r\n", ""}, 311 {"\n", ""}, 312 {"*", "\\*"}, 313 {"?", "\\?"}, 314 {"[", "\\["}, 315 {"#", "\\#"}, 316 {"~", "\\~"}, 317 {"=", "\\="}, 318 {"%", "\\%"}, 319 }); 320 321 /* UNESCAPE TRANSLATORS */ 322 323 /** 324 * Translator object for unescaping escaped Java. 325 * 326 * While {@link #unescapeJava(String)} is the expected method of use, this 327 * object allows the Java unescaping functionality to be used 328 * as the foundation for a custom translator. 329 */ 330 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 331 public static final CharSequenceTranslator UNESCAPE_JAVA = 332 new AggregateTranslator( 333 new OctalUnescaper(), // .between('\1', '\377'), 334 new UnicodeUnescaper(), 335 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 336 new LookupTranslator( 337 new String[][] { 338 {"\\\\", "\\"}, 339 {"\\\"", "\""}, 340 {"\\'", "'"}, 341 {"\\", ""} 342 }) 343 ); 344 345 /** 346 * Translator object for unescaping escaped EcmaScript. 347 * 348 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 349 * object allows the EcmaScript unescaping functionality to be used 350 * as the foundation for a custom translator. 351 */ 352 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 353 354 /** 355 * Translator object for unescaping escaped Json. 356 * 357 * While {@link #unescapeJson(String)} is the expected method of use, this 358 * object allows the Json unescaping functionality to be used 359 * as the foundation for a custom translator. 360 */ 361 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 362 363 /** 364 * Translator object for unescaping escaped HTML 3.0. 365 * 366 * While {@link #unescapeHtml3(String)} is the expected method of use, this 367 * object allows the HTML unescaping functionality to be used 368 * as the foundation for a custom translator. 369 */ 370 public static final CharSequenceTranslator UNESCAPE_HTML3 = 371 new AggregateTranslator( 372 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 373 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 374 new NumericEntityUnescaper() 375 ); 376 377 /** 378 * Translator object for unescaping escaped HTML 4.0. 379 * 380 * While {@link #unescapeHtml4(String)} is the expected method of use, this 381 * object allows the HTML unescaping functionality to be used 382 * as the foundation for a custom translator. 383 */ 384 public static final CharSequenceTranslator UNESCAPE_HTML4 = 385 new AggregateTranslator( 386 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 387 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 388 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 389 new NumericEntityUnescaper() 390 ); 391 392 /** 393 * Translator object for unescaping escaped XML. 394 * 395 * While {@link #unescapeXml(String)} is the expected method of use, this 396 * object allows the XML unescaping functionality to be used 397 * as the foundation for a custom translator. 398 */ 399 public static final CharSequenceTranslator UNESCAPE_XML = 400 new AggregateTranslator( 401 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 402 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 403 new NumericEntityUnescaper() 404 ); 405 406 /** 407 * Translator object for unescaping escaped Comma Separated Value entries. 408 * 409 * While {@link #unescapeCsv(String)} is the expected method of use, this 410 * object allows the CSV unescaping functionality to be used 411 * as the foundation for a custom translator. 412 */ 413 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 414 415 static class CsvUnescaper extends CharSequenceTranslator { 416 417 private static final char CSV_DELIMITER = ','; 418 private static final char CSV_QUOTE = '"'; 419 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 420 private static final char[] CSV_SEARCH_CHARS = 421 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 422 423 @Override 424 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 425 426 if(index != 0) { 427 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 428 } 429 430 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 431 out.write(input.toString()); 432 return Character.codePointCount(input, 0, input.length()); 433 } 434 435 // strip quotes 436 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 437 438 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 439 // deal with escaped quotes; ie) "" 440 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 441 } else { 442 out.write(input.toString()); 443 } 444 return Character.codePointCount(input, 0, input.length()); 445 } 446 } 447 448 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); 449 450 /** 451 * Translator object for unescaping backslash escaped entries. 452 */ 453 static class XsiUnescaper extends CharSequenceTranslator { 454 455 private static final char BACKSLASH = '\\'; 456 457 @Override 458 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 459 460 if(index != 0) { 461 throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); 462 } 463 464 String s = input.toString(); 465 466 int segmentStart = 0; 467 int searchOffset = 0; 468 while (true) { 469 int pos = s.indexOf(BACKSLASH, searchOffset); 470 if (pos == -1) { 471 if (segmentStart < s.length()) { 472 out.write(s.substring(segmentStart)); 473 } 474 break; 475 } 476 if (pos > segmentStart) { 477 out.write(s.substring(segmentStart, pos)); 478 } 479 segmentStart = pos + 1; 480 searchOffset = pos + 2; 481 } 482 483 return Character.codePointCount(input, 0, input.length()); 484 } 485 } 486 487 /* Helper functions */ 488 489 /** 490 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 491 * standard programming.</p> 492 * 493 * <p>Instead, the class should be used as:</p> 494 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 495 * 496 * <p>This constructor is public to permit tools that require a JavaBean 497 * instance to operate.</p> 498 */ 499 public StringEscapeUtils() { 500 super(); 501 } 502 503 /** 504 * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p> 505 * 506 * <p>Example:</p> 507 * <pre> 508 * new Builder(ESCAPE_HTML4) 509 * .append("<p>") 510 * .escape("This is paragraph 1 and special chars like & get escaped.") 511 * .append("</p><p>") 512 * .escape("This is paragraph 2 & more...") 513 * .append("</p>") 514 * .toString() 515 * </pre> 516 * 517 */ 518 public static class Builder { 519 520 private final StringBuilder sb; 521 private final CharSequenceTranslator translator; 522 523 private Builder(final CharSequenceTranslator translator) { 524 this.sb = new StringBuilder(); 525 this.translator = translator; 526 } 527 528 /** 529 * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p> 530 * 531 * @param input the String to escape 532 * @return {@code this}, to enable chaining 533 */ 534 public Builder escape(final String input) { 535 sb.append(translator.translate(input)); 536 return this; 537 } 538 539 /** 540 * Literal append, no escaping being done. 541 * 542 * @param input the String to append 543 * @return {@code this}, to enable chaining 544 */ 545 public Builder append(final String input) { 546 sb.append(input); 547 return this; 548 } 549 550 /** 551 * <p>Return the escaped string.</p> 552 * 553 * @return the escaped string 554 */ 555 @Override 556 public String toString() { 557 return sb.toString(); 558 } 559 } 560 561 /** 562 * Get a {@link Builder}. 563 * @param translator the text translator 564 * @return {@link Builder} 565 */ 566 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { 567 return new Builder(translator); 568 } 569 570 // Java and JavaScript 571 //-------------------------------------------------------------------------- 572 /** 573 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 574 * 575 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 576 * 577 * <p>So a tab becomes the characters {@code '\\'} and 578 * {@code 't'}.</p> 579 * 580 * <p>The only difference between Java strings and JavaScript strings 581 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 582 * 583 * <p>Example:</p> 584 * <pre> 585 * input string: He didn't say, "Stop!" 586 * output string: He didn't say, \"Stop!\" 587 * </pre> 588 * 589 * @param input String to escape values in, may be null 590 * @return String with escaped values, {@code null} if null string input 591 */ 592 public static final String escapeJava(final String input) { 593 return ESCAPE_JAVA.translate(input); 594 } 595 596 /** 597 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 598 * <p>Escapes any values it finds into their EcmaScript String form. 599 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 600 * 601 * <p>So a tab becomes the characters {@code '\\'} and 602 * {@code 't'}.</p> 603 * 604 * <p>The only difference between Java strings and EcmaScript strings 605 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 606 * 607 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 608 * 609 * <p>Example:</p> 610 * <pre> 611 * input string: He didn't say, "Stop!" 612 * output string: He didn\'t say, \"Stop!\" 613 * </pre> 614 * 615 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output 616 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used 617 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you 618 * may consider the 619 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. Further, 620 * you can view the 621 * <a href="https://github.com/esapi">ESAPI GitHub Org</a>. 622 * 623 * @param input String to escape values in, may be null 624 * @return String with escaped values, {@code null} if null string input 625 */ 626 public static final String escapeEcmaScript(final String input) { 627 return ESCAPE_ECMASCRIPT.translate(input); 628 } 629 630 /** 631 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 632 * <p>Escapes any values it finds into their Json String form. 633 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 634 * 635 * <p>So a tab becomes the characters {@code '\\'} and 636 * {@code 't'}.</p> 637 * 638 * <p>The only difference between Java strings and Json strings 639 * is that in Json, forward-slash (/) is escaped.</p> 640 * 641 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 642 * 643 * <p>Example:</p> 644 * <pre> 645 * input string: He didn't say, "Stop!" 646 * output string: He didn't say, \"Stop!\" 647 * </pre> 648 * 649 * @param input String to escape values in, may be null 650 * @return String with escaped values, {@code null} if null string input 651 */ 652 public static final String escapeJson(final String input) { 653 return ESCAPE_JSON.translate(input); 654 } 655 656 /** 657 * <p>Unescapes any Java literals found in the {@code String}. 658 * For example, it will turn a sequence of {@code '\'} and 659 * {@code 'n'} into a newline character, unless the {@code '\'} 660 * is preceded by another {@code '\'}.</p> 661 * 662 * @param input the {@code String} to unescape, may be null 663 * @return a new unescaped {@code String}, {@code null} if null string input 664 */ 665 public static final String unescapeJava(final String input) { 666 return UNESCAPE_JAVA.translate(input); 667 } 668 669 /** 670 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 671 * 672 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 673 * into a newline character, unless the {@code '\'} is preceded by another 674 * {@code '\'}.</p> 675 * 676 * @see #unescapeJava(String) 677 * @param input the {@code String} to unescape, may be null 678 * @return A new unescaped {@code String}, {@code null} if null string input 679 */ 680 public static final String unescapeEcmaScript(final String input) { 681 return UNESCAPE_ECMASCRIPT.translate(input); 682 } 683 684 /** 685 * <p>Unescapes any Json literals found in the {@code String}.</p> 686 * 687 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 688 * into a newline character, unless the {@code '\'} is preceded by another 689 * {@code '\'}.</p> 690 * 691 * @see #unescapeJava(String) 692 * @param input the {@code String} to unescape, may be null 693 * @return A new unescaped {@code String}, {@code null} if null string input 694 */ 695 public static final String unescapeJson(final String input) { 696 return UNESCAPE_JSON.translate(input); 697 } 698 699 // HTML and XML 700 //-------------------------------------------------------------------------- 701 /** 702 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 703 * 704 * <p> 705 * For example: 706 * </p> 707 * <p><code>"bread" & "butter"</code></p> 708 * becomes: 709 * <p> 710 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 711 * </p> 712 * 713 * <p>Supports all known HTML 4.0 entities, including funky accents. 714 * Note that the commonly used apostrophe escape character (&apos;) 715 * is not a legal entity and so is not supported). </p> 716 * 717 * @param input the {@code String} to escape, may be null 718 * @return a new escaped {@code String}, {@code null} if null string input 719 * 720 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 721 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 722 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 723 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 724 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 725 */ 726 public static final String escapeHtml4(final String input) { 727 return ESCAPE_HTML4.translate(input); 728 } 729 730 /** 731 * <p>Escapes the characters in a {@code String} using HTML entities. 732 * But escapes them only once. i.e. does not escape already escaped characters.</p> 733 * 734 * <p> 735 * For example: 736 * </p> 737 * <p><code>"bread" & "butter"</code></p> 738 * becomes: 739 * <p> 740 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 741 * </p> 742 * 743 * <p> 744 * But: 745 * </p> 746 * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code></p> 747 * remains unaffected. 748 * 749 * <p>Supports all known HTML 4.0 entities, including funky accents. 750 * Note that the commonly used apostrophe escape character (&apos;) 751 * is not a legal entity and so is not supported). </p> 752 * 753 * @param input the {@code String} to escape, may be null 754 * @return a new escaped {@code String}, {@code null} if null string input 755 * 756 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 757 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 758 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 759 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 760 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 761 */ 762 public static final String escapeHtml4Once(final String input) { 763 return ESCAPE_HTML4_ONCE.translate(input); 764 } 765 766 767 /** 768 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 769 * <p>Supports only the HTML 3.0 entities. </p> 770 * 771 * @param input the {@code String} to escape, may be null 772 * @return a new escaped {@code String}, {@code null} if null string input 773 */ 774 public static final String escapeHtml3(final String input) { 775 return ESCAPE_HTML3.translate(input); 776 } 777 778 /** 779 * <p>Escapes the characters in a {@code String} using HTML entities. 780 * But escapes them only once. i.e. does not escape already escaped characters.</p> 781 * <p>Supports only the HTML 3.0 entities. </p> 782 * 783 * @param input the {@code String} to escape, may be null 784 * @return a new escaped {@code String}, {@code null} if null string input 785 */ 786 public static final String escapeHtml3Once(final String input) { 787 return ESCAPE_HTML3_ONCE.translate(input); 788 } 789 790 //----------------------------------------------------------------------- 791 /** 792 * <p>Unescapes a string containing entity escapes to a string 793 * containing the actual Unicode characters corresponding to the 794 * escapes. Supports HTML 4.0 entities.</p> 795 * 796 * <p>For example, the string {@code "<Français>"} 797 * will become {@code "<Français>"}</p> 798 * 799 * <p>If an entity is unrecognized, it is left alone, and inserted 800 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 801 * become {@code ">&zzzz;x"}.</p> 802 * 803 * @param input the {@code String} to unescape, may be null 804 * @return a new unescaped {@code String}, {@code null} if null string input 805 */ 806 public static final String unescapeHtml4(final String input) { 807 return UNESCAPE_HTML4.translate(input); 808 } 809 810 /** 811 * <p>Unescapes a string containing entity escapes to a string 812 * containing the actual Unicode characters corresponding to the 813 * escapes. Supports only HTML 3.0 entities.</p> 814 * 815 * @param input the {@code String} to unescape, may be null 816 * @return a new unescaped {@code String}, {@code null} if null string input 817 */ 818 public static final String unescapeHtml3(final String input) { 819 return UNESCAPE_HTML3.translate(input); 820 } 821 822 /** 823 * <p>Escapes the characters in a {@code String} using XML entities.</p> 824 * 825 * <p>For example: {@code "bread" & "butter"} => 826 * {@code "bread" & "butter"}. 827 * </p> 828 * 829 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 830 * characters or unpaired Unicode surrogate codepoints, even after escaping. 831 * {@code escapeXml10} will remove characters that do not fit in the 832 * following ranges:</p> 833 * 834 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 835 * 836 * <p>Though not strictly necessary, {@code escapeXml10} will escape 837 * characters in the following ranges:</p> 838 * 839 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 840 * 841 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 842 * document. If you want to allow more non-text characters in an XML 1.1 843 * document, use {@link #escapeXml11(String)}.</p> 844 * 845 * @param input the {@code String} to escape, may be null 846 * @return a new escaped {@code String}, {@code null} if null string input 847 * @see #unescapeXml(java.lang.String) 848 */ 849 public static String escapeXml10(final String input) { 850 return ESCAPE_XML10.translate(input); 851 } 852 853 /** 854 * <p>Escapes the characters in a {@code String} using XML entities.</p> 855 * 856 * <p>For example: {@code "bread" & "butter"} => 857 * {@code "bread" & "butter"}. 858 * </p> 859 * 860 * <p>XML 1.1 can represent certain control characters, but it cannot represent 861 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 862 * {@code escapeXml11} will remove characters that do not fit in the following 863 * ranges:</p> 864 * 865 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 866 * 867 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 868 * 869 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 870 * 871 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 872 * use it for XML 1.0 documents.</p> 873 * 874 * @param input the {@code String} to escape, may be null 875 * @return a new escaped {@code String}, {@code null} if null string input 876 * @see #unescapeXml(java.lang.String) 877 */ 878 public static String escapeXml11(final String input) { 879 return ESCAPE_XML11.translate(input); 880 } 881 882 //----------------------------------------------------------------------- 883 /** 884 * <p>Unescapes a string containing XML entity escapes to a string 885 * containing the actual Unicode characters corresponding to the 886 * escapes.</p> 887 * 888 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 889 * Does not support DTDs or external entities.</p> 890 * 891 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 892 * Unicode characters. This may change in future releases. </p> 893 * 894 * @param input the {@code String} to unescape, may be null 895 * @return a new unescaped {@code String}, {@code null} if null string input 896 * @see #escapeXml10(String) 897 * @see #escapeXml11(String) 898 */ 899 public static final String unescapeXml(final String input) { 900 return UNESCAPE_XML.translate(input); 901 } 902 903 //----------------------------------------------------------------------- 904 905 /** 906 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 907 * if required.</p> 908 * 909 * <p>If the value contains a comma, newline or double quote, then the 910 * String value is returned enclosed in double quotes.</p> 911 * 912 * <p>Any double quote characters in the value are escaped with another double quote.</p> 913 * 914 * <p>If the value does not contain a comma, newline or double quote, then the 915 * String value is returned unchanged.</p> 916 * 917 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 918 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 919 * 920 * @param input the input CSV column String, may be null 921 * @return the input String, enclosed in double quotes if the value contains a comma, 922 * newline or double quote, {@code null} if null string input 923 */ 924 public static final String escapeCsv(final String input) { 925 return ESCAPE_CSV.translate(input); 926 } 927 928 /** 929 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 930 * 931 * <p>If the value is enclosed in double quotes, and contains a comma, newline 932 * or double quote, then quotes are removed. 933 * </p> 934 * 935 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 936 * to just one double quote. </p> 937 * 938 * <p>If the value is not enclosed in double quotes, or is and does not contain a 939 * comma, newline or double quote, then the String value is returned unchanged.</p> 940 * 941 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 942 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 943 * 944 * @param input the input CSV column String, may be null 945 * @return the input String, with enclosing double quotes removed and embedded double 946 * quotes unescaped, {@code null} if null string input 947 */ 948 public static final String unescapeCsv(final String input) { 949 return UNESCAPE_CSV.translate(input); 950 } 951 952 /** 953 * <p>Escapes the characters in a {@code String} using XSI rules.</p> 954 * 955 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument 956 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])} 957 * instead.</p> 958 * 959 * <p>Example:</p> 960 * <pre> 961 * input string: He didn't say, "Stop!" 962 * output string: He\ didn\'t\ say,\ \"Stop!\" 963 * </pre> 964 * 965 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a> 966 * @param input String to escape values in, may be null 967 * @return String with escaped values, {@code null} if null string input 968 */ 969 public static final String escapeXSI(final String input) { 970 return ESCAPE_XSI.translate(input); 971 } 972 973 /** 974 * <p>Unescapes the characters in a {@code String} using XSI rules.</p> 975 * 976 * @see StringEscapeUtils#escapeXSI(String) 977 * @param input the {@code String} to unescape, may be null 978 * @return a new unescaped {@code String}, {@code null} if null string input 979 */ 980 public static final String unescapeXSI(final String input) { 981 return UNESCAPE_XSI.translate(input); 982 } 983 984 }