001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * <p>Escapes and unescapes {@code String}s for 035 * Java, Java Script, HTML and XML.</p> 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 * @version $Id: StringEscapeUtils.java 1568639 2014-02-15 16:13:27Z britter $ 040 */ 041public class StringEscapeUtils { 042 043 /* ESCAPE TRANSLATORS */ 044 045 /** 046 * Translator object for escaping Java. 047 * 048 * While {@link #escapeJava(String)} is the expected method of use, this 049 * object allows the Java escaping functionality to be used 050 * as the foundation for a custom translator. 051 * 052 * @since 3.0 053 */ 054 public static final CharSequenceTranslator ESCAPE_JAVA = 055 new LookupTranslator( 056 new String[][] { 057 {"\"", "\\\""}, 058 {"\\", "\\\\"}, 059 }).with( 060 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 061 ).with( 062 JavaUnicodeEscaper.outsideOf(32, 0x7f) 063 ); 064 065 /** 066 * Translator object for escaping EcmaScript/JavaScript. 067 * 068 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 069 * object allows the EcmaScript escaping functionality to be used 070 * as the foundation for a custom translator. 071 * 072 * @since 3.0 073 */ 074 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 075 new AggregateTranslator( 076 new LookupTranslator( 077 new String[][] { 078 {"'", "\\'"}, 079 {"\"", "\\\""}, 080 {"\\", "\\\\"}, 081 {"/", "\\/"} 082 }), 083 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 084 JavaUnicodeEscaper.outsideOf(32, 0x7f) 085 ); 086 087 /** 088 * Translator object for escaping Json. 089 * 090 * While {@link #escapeJson(String)} is the expected method of use, this 091 * object allows the Json escaping functionality to be used 092 * as the foundation for a custom translator. 093 * 094 * @since 3.2 095 */ 096 public static final CharSequenceTranslator ESCAPE_JSON = 097 new AggregateTranslator( 098 new LookupTranslator( 099 new String[][] { 100 {"\"", "\\\""}, 101 {"\\", "\\\\"}, 102 {"/", "\\/"} 103 }), 104 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 105 JavaUnicodeEscaper.outsideOf(32, 0x7f) 106 ); 107 108 /** 109 * Translator object for escaping XML. 110 * 111 * While {@link #escapeXml(String)} is the expected method of use, this 112 * object allows the XML escaping functionality to be used 113 * as the foundation for a custom translator. 114 * 115 * @since 3.0 116 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 117 */ 118 @Deprecated 119 public static final CharSequenceTranslator ESCAPE_XML = 120 new AggregateTranslator( 121 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 122 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 123 ); 124 125 /** 126 * Translator object for escaping XML 1.0. 127 * 128 * While {@link #escapeXml10(String)} is the expected method of use, this 129 * object allows the XML escaping functionality to be used 130 * as the foundation for a custom translator. 131 * 132 * @since 3.3 133 */ 134 public static final CharSequenceTranslator ESCAPE_XML10 = 135 new AggregateTranslator( 136 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 137 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 138 new LookupTranslator( 139 new String[][] { 140 { "\u0000", "" }, 141 { "\u0001", "" }, 142 { "\u0002", "" }, 143 { "\u0003", "" }, 144 { "\u0004", "" }, 145 { "\u0005", "" }, 146 { "\u0006", "" }, 147 { "\u0007", "" }, 148 { "\u0008", "" }, 149 { "\u000b", "" }, 150 { "\u000c", "" }, 151 { "\u000e", "" }, 152 { "\u000f", "" }, 153 { "\u0010", "" }, 154 { "\u0011", "" }, 155 { "\u0012", "" }, 156 { "\u0013", "" }, 157 { "\u0014", "" }, 158 { "\u0015", "" }, 159 { "\u0016", "" }, 160 { "\u0017", "" }, 161 { "\u0018", "" }, 162 { "\u0019", "" }, 163 { "\u001a", "" }, 164 { "\u001b", "" }, 165 { "\u001c", "" }, 166 { "\u001d", "" }, 167 { "\u001e", "" }, 168 { "\u001f", "" }, 169 { "\ufffe", "" }, 170 { "\uffff", "" } 171 }), 172 NumericEntityEscaper.between(0x7f, 0x84), 173 NumericEntityEscaper.between(0x86, 0x9f), 174 new UnicodeUnpairedSurrogateRemover() 175 ); 176 177 /** 178 * Translator object for escaping XML 1.1. 179 * 180 * While {@link #escapeXml11(String)} is the expected method of use, this 181 * object allows the XML escaping functionality to be used 182 * as the foundation for a custom translator. 183 * 184 * @since 3.3 185 */ 186 public static final CharSequenceTranslator ESCAPE_XML11 = 187 new AggregateTranslator( 188 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 189 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 190 new LookupTranslator( 191 new String[][] { 192 { "\u0000", "" }, 193 { "\u000b", "" }, 194 { "\u000c", "" }, 195 { "\ufffe", "" }, 196 { "\uffff", "" } 197 }), 198 NumericEntityEscaper.between(0x1, 0x8), 199 NumericEntityEscaper.between(0xe, 0x1f), 200 NumericEntityEscaper.between(0x7f, 0x84), 201 NumericEntityEscaper.between(0x86, 0x9f), 202 new UnicodeUnpairedSurrogateRemover() 203 ); 204 205 /** 206 * Translator object for escaping HTML version 3.0. 207 * 208 * While {@link #escapeHtml3(String)} is the expected method of use, this 209 * object allows the HTML escaping functionality to be used 210 * as the foundation for a custom translator. 211 * 212 * @since 3.0 213 */ 214 public static final CharSequenceTranslator ESCAPE_HTML3 = 215 new AggregateTranslator( 216 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 217 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 218 ); 219 220 /** 221 * Translator object for escaping HTML version 4.0. 222 * 223 * While {@link #escapeHtml4(String)} is the expected method of use, this 224 * object allows the HTML escaping functionality to be used 225 * as the foundation for a custom translator. 226 * 227 * @since 3.0 228 */ 229 public static final CharSequenceTranslator ESCAPE_HTML4 = 230 new AggregateTranslator( 231 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 232 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 233 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 234 ); 235 236 /** 237 * Translator object for escaping individual Comma Separated Values. 238 * 239 * While {@link #escapeCsv(String)} is the expected method of use, this 240 * object allows the CSV escaping functionality to be used 241 * as the foundation for a custom translator. 242 * 243 * @since 3.0 244 */ 245 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 246 247 // TODO: Create a parent class - 'SinglePassTranslator' ? 248 // It would handle the index checking + length returning, 249 // and could also have an optimization check method. 250 static class CsvEscaper extends CharSequenceTranslator { 251 252 private static final char CSV_DELIMITER = ','; 253 private static final char CSV_QUOTE = '"'; 254 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 255 private static final char[] CSV_SEARCH_CHARS = 256 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 257 258 @Override 259 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 260 261 if(index != 0) { 262 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 263 } 264 265 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 266 out.write(input.toString()); 267 } else { 268 out.write(CSV_QUOTE); 269 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 270 out.write(CSV_QUOTE); 271 } 272 return Character.codePointCount(input, 0, input.length()); 273 } 274 } 275 276 /* UNESCAPE TRANSLATORS */ 277 278 /** 279 * Translator object for unescaping escaped Java. 280 * 281 * While {@link #unescapeJava(String)} is the expected method of use, this 282 * object allows the Java unescaping functionality to be used 283 * as the foundation for a custom translator. 284 * 285 * @since 3.0 286 */ 287 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 288 public static final CharSequenceTranslator UNESCAPE_JAVA = 289 new AggregateTranslator( 290 new OctalUnescaper(), // .between('\1', '\377'), 291 new UnicodeUnescaper(), 292 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 293 new LookupTranslator( 294 new String[][] { 295 {"\\\\", "\\"}, 296 {"\\\"", "\""}, 297 {"\\'", "'"}, 298 {"\\", ""} 299 }) 300 ); 301 302 /** 303 * Translator object for unescaping escaped EcmaScript. 304 * 305 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 306 * object allows the EcmaScript unescaping functionality to be used 307 * as the foundation for a custom translator. 308 * 309 * @since 3.0 310 */ 311 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 312 313 /** 314 * Translator object for unescaping escaped Json. 315 * 316 * While {@link #unescapeJson(String)} is the expected method of use, this 317 * object allows the Json unescaping functionality to be used 318 * as the foundation for a custom translator. 319 * 320 * @since 3.2 321 */ 322 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 323 324 /** 325 * Translator object for unescaping escaped HTML 3.0. 326 * 327 * While {@link #unescapeHtml3(String)} is the expected method of use, this 328 * object allows the HTML unescaping functionality to be used 329 * as the foundation for a custom translator. 330 * 331 * @since 3.0 332 */ 333 public static final CharSequenceTranslator UNESCAPE_HTML3 = 334 new AggregateTranslator( 335 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 336 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 337 new NumericEntityUnescaper() 338 ); 339 340 /** 341 * Translator object for unescaping escaped HTML 4.0. 342 * 343 * While {@link #unescapeHtml4(String)} is the expected method of use, this 344 * object allows the HTML unescaping functionality to be used 345 * as the foundation for a custom translator. 346 * 347 * @since 3.0 348 */ 349 public static final CharSequenceTranslator UNESCAPE_HTML4 = 350 new AggregateTranslator( 351 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 352 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 353 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 354 new NumericEntityUnescaper() 355 ); 356 357 /** 358 * Translator object for unescaping escaped XML. 359 * 360 * While {@link #unescapeXml(String)} is the expected method of use, this 361 * object allows the XML unescaping functionality to be used 362 * as the foundation for a custom translator. 363 * 364 * @since 3.0 365 */ 366 public static final CharSequenceTranslator UNESCAPE_XML = 367 new AggregateTranslator( 368 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 369 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 370 new NumericEntityUnescaper() 371 ); 372 373 /** 374 * Translator object for unescaping escaped Comma Separated Value entries. 375 * 376 * While {@link #unescapeCsv(String)} is the expected method of use, this 377 * object allows the CSV unescaping functionality to be used 378 * as the foundation for a custom translator. 379 * 380 * @since 3.0 381 */ 382 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 383 384 static class CsvUnescaper extends CharSequenceTranslator { 385 386 private static final char CSV_DELIMITER = ','; 387 private static final char CSV_QUOTE = '"'; 388 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 389 private static final char[] CSV_SEARCH_CHARS = 390 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 391 392 @Override 393 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 394 395 if(index != 0) { 396 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 397 } 398 399 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { 400 out.write(input.toString()); 401 return Character.codePointCount(input, 0, input.length()); 402 } 403 404 // strip quotes 405 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 406 407 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { 408 // deal with escaped quotes; ie) "" 409 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 410 } else { 411 out.write(input.toString()); 412 } 413 return Character.codePointCount(input, 0, input.length()); 414 } 415 } 416 417 /* Helper functions */ 418 419 /** 420 * <p>{@code StringEscapeUtils} instances should NOT be constructed in 421 * standard programming.</p> 422 * 423 * <p>Instead, the class should be used as: 424 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p> 425 * 426 * <p>This constructor is public to permit tools that require a JavaBean 427 * instance to operate.</p> 428 */ 429 public StringEscapeUtils() { 430 super(); 431 } 432 433 // Java and JavaScript 434 //-------------------------------------------------------------------------- 435 /** 436 * <p>Escapes the characters in a {@code String} using Java String rules.</p> 437 * 438 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 439 * 440 * <p>So a tab becomes the characters {@code '\\'} and 441 * {@code 't'}.</p> 442 * 443 * <p>The only difference between Java strings and JavaScript strings 444 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 445 * 446 * <p>Example: 447 * <pre> 448 * input string: He didn't say, "Stop!" 449 * output string: He didn't say, \"Stop!\" 450 * </pre> 451 * </p> 452 * 453 * @param input String to escape values in, may be null 454 * @return String with escaped values, {@code null} if null string input 455 */ 456 public static final String escapeJava(final String input) { 457 return ESCAPE_JAVA.translate(input); 458 } 459 460 /** 461 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p> 462 * <p>Escapes any values it finds into their EcmaScript String form. 463 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 464 * 465 * <p>So a tab becomes the characters {@code '\\'} and 466 * {@code 't'}.</p> 467 * 468 * <p>The only difference between Java strings and EcmaScript strings 469 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 470 * 471 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p> 472 * 473 * <p>Example: 474 * <pre> 475 * input string: He didn't say, "Stop!" 476 * output string: He didn\'t say, \"Stop!\" 477 * </pre> 478 * </p> 479 * 480 * @param input String to escape values in, may be null 481 * @return String with escaped values, {@code null} if null string input 482 * 483 * @since 3.0 484 */ 485 public static final String escapeEcmaScript(final String input) { 486 return ESCAPE_ECMASCRIPT.translate(input); 487 } 488 489 /** 490 * <p>Escapes the characters in a {@code String} using Json String rules.</p> 491 * <p>Escapes any values it finds into their Json String form. 492 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 493 * 494 * <p>So a tab becomes the characters {@code '\\'} and 495 * {@code 't'}.</p> 496 * 497 * <p>The only difference between Java strings and Json strings 498 * is that in Json, forward-slash (/) is escaped.</p> 499 * 500 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p> 501 * 502 * <p>Example: 503 * <pre> 504 * input string: He didn't say, "Stop!" 505 * output string: He didn't say, \"Stop!\" 506 * </pre> 507 * </p> 508 * 509 * @param input String to escape values in, may be null 510 * @return String with escaped values, {@code null} if null string input 511 * 512 * @since 3.2 513 */ 514 public static final String escapeJson(final String input) { 515 return ESCAPE_JSON.translate(input); 516 } 517 518 /** 519 * <p>Unescapes any Java literals found in the {@code String}. 520 * For example, it will turn a sequence of {@code '\'} and 521 * {@code 'n'} into a newline character, unless the {@code '\'} 522 * is preceded by another {@code '\'}.</p> 523 * 524 * @param input the {@code String} to unescape, may be null 525 * @return a new unescaped {@code String}, {@code null} if null string input 526 */ 527 public static final String unescapeJava(final String input) { 528 return UNESCAPE_JAVA.translate(input); 529 } 530 531 /** 532 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p> 533 * 534 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 535 * into a newline character, unless the {@code '\'} is preceded by another 536 * {@code '\'}.</p> 537 * 538 * @see #unescapeJava(String) 539 * @param input the {@code String} to unescape, may be null 540 * @return A new unescaped {@code String}, {@code null} if null string input 541 * 542 * @since 3.0 543 */ 544 public static final String unescapeEcmaScript(final String input) { 545 return UNESCAPE_ECMASCRIPT.translate(input); 546 } 547 548 /** 549 * <p>Unescapes any Json literals found in the {@code String}.</p> 550 * 551 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 552 * into a newline character, unless the {@code '\'} is preceded by another 553 * {@code '\'}.</p> 554 * 555 * @see #unescapeJava(String) 556 * @param input the {@code String} to unescape, may be null 557 * @return A new unescaped {@code String}, {@code null} if null string input 558 * 559 * @since 3.2 560 */ 561 public static final String unescapeJson(final String input) { 562 return UNESCAPE_JSON.translate(input); 563 } 564 565 // HTML and XML 566 //-------------------------------------------------------------------------- 567 /** 568 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 569 * 570 * <p> 571 * For example: 572 * </p> 573 * <p><code>"bread" & "butter"</code></p> 574 * becomes: 575 * <p> 576 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. 577 * </p> 578 * 579 * <p>Supports all known HTML 4.0 entities, including funky accents. 580 * Note that the commonly used apostrophe escape character (&apos;) 581 * is not a legal entity and so is not supported). </p> 582 * 583 * @param input the {@code String} to escape, may be null 584 * @return a new escaped {@code String}, {@code null} if null string input 585 * 586 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 587 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 588 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 589 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 590 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 591 * 592 * @since 3.0 593 */ 594 public static final String escapeHtml4(final String input) { 595 return ESCAPE_HTML4.translate(input); 596 } 597 598 /** 599 * <p>Escapes the characters in a {@code String} using HTML entities.</p> 600 * <p>Supports only the HTML 3.0 entities. </p> 601 * 602 * @param input the {@code String} to escape, may be null 603 * @return a new escaped {@code String}, {@code null} if null string input 604 * 605 * @since 3.0 606 */ 607 public static final String escapeHtml3(final String input) { 608 return ESCAPE_HTML3.translate(input); 609 } 610 611 //----------------------------------------------------------------------- 612 /** 613 * <p>Unescapes a string containing entity escapes to a string 614 * containing the actual Unicode characters corresponding to the 615 * escapes. Supports HTML 4.0 entities.</p> 616 * 617 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;" 618 * will become "<Français>"</p> 619 * 620 * <p>If an entity is unrecognized, it is left alone, and inserted 621 * verbatim into the result string. e.g. "&gt;&zzzz;x" will 622 * become ">&zzzz;x".</p> 623 * 624 * @param input the {@code String} to unescape, may be null 625 * @return a new unescaped {@code String}, {@code null} if null string input 626 * 627 * @since 3.0 628 */ 629 public static final String unescapeHtml4(final String input) { 630 return UNESCAPE_HTML4.translate(input); 631 } 632 633 /** 634 * <p>Unescapes a string containing entity escapes to a string 635 * containing the actual Unicode characters corresponding to the 636 * escapes. Supports only HTML 3.0 entities.</p> 637 * 638 * @param input the {@code String} to unescape, may be null 639 * @return a new unescaped {@code String}, {@code null} if null string input 640 * 641 * @since 3.0 642 */ 643 public static final String unescapeHtml3(final String input) { 644 return UNESCAPE_HTML3.translate(input); 645 } 646 647 //----------------------------------------------------------------------- 648 /** 649 * <p>Escapes the characters in a {@code String} using XML entities.</p> 650 * 651 * <p>For example: <tt>"bread" & "butter"</tt> => 652 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 653 * </p> 654 * 655 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 656 * Does not support DTDs or external entities.</p> 657 * 658 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 659 * escaped. If you still wish this functionality, you can achieve it 660 * via the following: 661 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) );}</p> 662 * 663 * @param input the {@code String} to escape, may be null 664 * @return a new escaped {@code String}, {@code null} if null string input 665 * @see #unescapeXml(java.lang.String) 666 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 667 */ 668 @Deprecated 669 @SuppressWarnings( "deprecation" ) // ESCAPE_XML has been replaced by ESCAPE_XML10 and ESCAPE_XML11 in 3.3 670 public static final String escapeXml(final String input) { 671 return ESCAPE_XML.translate(input); 672 } 673 674 /** 675 * <p>Escapes the characters in a {@code String} using XML entities.</p> 676 * 677 * <p>For example: <tt>"bread" & "butter"</tt> => 678 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 679 * </p> 680 * 681 * <p>Note that XML 1.0 is a text-only format: it cannot represent control 682 * characters or unpaired Unicode surrogate codepoints, even after escaping. 683 * {@code escapeXml10} will remove characters that do not fit in the 684 * following ranges:</p> 685 * 686 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 687 * 688 * <p>Though not strictly necessary, {@code escapeXml10} will escape 689 * characters in the following ranges:</p> 690 * 691 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p> 692 * 693 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1 694 * document. If you want to allow more non-text characters in an XML 1.1 695 * document, use {@link #escapeXml11(String)}.</p> 696 * 697 * @param input the {@code String} to escape, may be null 698 * @return a new escaped {@code String}, {@code null} if null string input 699 * @see #unescapeXml(java.lang.String) 700 * @since 3.3 701 */ 702 public static String escapeXml10(final String input) { 703 return ESCAPE_XML10.translate(input); 704 } 705 706 /** 707 * <p>Escapes the characters in a {@code String} using XML entities.</p> 708 * 709 * <p>For example: <tt>"bread" & "butter"</tt> => 710 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>. 711 * </p> 712 * 713 * <p>XML 1.1 can represent certain control characters, but it cannot represent 714 * the null byte or unpaired Unicode surrogate codepoints, even after escaping. 715 * {@code escapeXml11} will remove characters that do not fit in the following 716 * ranges:</p> 717 * 718 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 719 * 720 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 721 * 722 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 723 * 724 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 725 * use it for XML 1.0 documents.</p> 726 * 727 * @param input the {@code String} to escape, may be null 728 * @return a new escaped {@code String}, {@code null} if null string input 729 * @see #unescapeXml(java.lang.String) 730 * @since 3.3 731 */ 732 public static String escapeXml11(final String input) { 733 return ESCAPE_XML11.translate(input); 734 } 735 736 //----------------------------------------------------------------------- 737 /** 738 * <p>Unescapes a string containing XML entity escapes to a string 739 * containing the actual Unicode characters corresponding to the 740 * escapes.</p> 741 * 742 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 743 * Does not support DTDs or external entities.</p> 744 * 745 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 746 * Unicode characters. This may change in future releases. </p> 747 * 748 * @param input the {@code String} to unescape, may be null 749 * @return a new unescaped {@code String}, {@code null} if null string input 750 * @see #escapeXml(String) 751 * @see #escapeXml10(String) 752 * @see #escapeXml11(String) 753 */ 754 public static final String unescapeXml(final String input) { 755 return UNESCAPE_XML.translate(input); 756 } 757 758 //----------------------------------------------------------------------- 759 760 /** 761 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes, 762 * if required.</p> 763 * 764 * <p>If the value contains a comma, newline or double quote, then the 765 * String value is returned enclosed in double quotes.</p> 766 * </p> 767 * 768 * <p>Any double quote characters in the value are escaped with another double quote.</p> 769 * 770 * <p>If the value does not contain a comma, newline or double quote, then the 771 * String value is returned unchanged.</p> 772 * </p> 773 * 774 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 775 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 776 * 777 * @param input the input CSV column String, may be null 778 * @return the input String, enclosed in double quotes if the value contains a comma, 779 * newline or double quote, {@code null} if null string input 780 * @since 2.4 781 */ 782 public static final String escapeCsv(final String input) { 783 return ESCAPE_CSV.translate(input); 784 } 785 786 /** 787 * <p>Returns a {@code String} value for an unescaped CSV column. </p> 788 * 789 * <p>If the value is enclosed in double quotes, and contains a comma, newline 790 * or double quote, then quotes are removed. 791 * </p> 792 * 793 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 794 * to just one double quote. </p> 795 * 796 * <p>If the value is not enclosed in double quotes, or is and does not contain a 797 * comma, newline or double quote, then the String value is returned unchanged.</p> 798 * </p> 799 * 800 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 801 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 802 * 803 * @param input the input CSV column String, may be null 804 * @return the input String, with enclosing double quotes removed and embedded double 805 * quotes unescaped, {@code null} if null string input 806 * @since 2.4 807 */ 808 public static final String unescapeCsv(final String input) { 809 return UNESCAPE_CSV.translate(input); 810 } 811 812}