1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import org.apache.commons.lang3.CharUtils;
20 import org.apache.commons.lang3.StringUtils;
21 import org.apache.commons.text.translate.AggregateTranslator;
22 import org.apache.commons.text.translate.CharSequenceTranslator;
23 import org.apache.commons.text.translate.EntityArrays;
24 import org.apache.commons.text.translate.JavaUnicodeEscaper;
25 import org.apache.commons.text.translate.LookupTranslator;
26 import org.apache.commons.text.translate.NumericEntityEscaper;
27 import org.apache.commons.text.translate.NumericEntityUnescaper;
28 import org.apache.commons.text.translate.OctalUnescaper;
29 import org.apache.commons.text.translate.SingleLookupTranslator;
30 import org.apache.commons.text.translate.UnicodeUnescaper;
31 import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
32
33 import java.io.IOException;
34 import java.io.Writer;
35
36 /**
37 * <p>Escapes and unescapes {@code String}s for
38 * Java, Java Script, HTML and XML.</p>
39 *
40 * <p>#ThreadSafe#</p>
41 *
42 *
43 * <p>
44 * This code has been adapted from Apache Commons Lang 3.5.
45 * </p>
46 *
47 * @since 1.0
48 */
49 public class StringEscapeUtils {
50
51 /* ESCAPE TRANSLATORS */
52
53 /**
54 * Translator object for escaping Java.
55 *
56 * While {@link #escapeJava(String)} is the expected method of use, this
57 * object allows the Java escaping functionality to be used
58 * as the foundation for a custom translator.
59 */
60 public static final CharSequenceTranslator ESCAPE_JAVA =
61 new LookupTranslator(
62 new String[][] {
63 {"\"", "\\\""},
64 {"\\", "\\\\"},
65 }).with(
66 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
67 ).with(
68 JavaUnicodeEscaper.outsideOf(32, 0x7f)
69 );
70
71 /**
72 * Translator object for escaping EcmaScript/JavaScript.
73 *
74 * While {@link #escapeEcmaScript(String)} is the expected method of use, this
75 * object allows the EcmaScript escaping functionality to be used
76 * as the foundation for a custom translator.
77 */
78 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
79 new AggregateTranslator(
80 new LookupTranslator(
81 new String[][] {
82 {"'", "\\'"},
83 {"\"", "\\\""},
84 {"\\", "\\\\"},
85 {"/", "\\/"}
86 }),
87 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
88 JavaUnicodeEscaper.outsideOf(32, 0x7f)
89 );
90
91 /**
92 * Translator object for escaping Json.
93 *
94 * While {@link #escapeJson(String)} is the expected method of use, this
95 * object allows the Json escaping functionality to be used
96 * as the foundation for a custom translator.
97 */
98 public static final CharSequenceTranslator ESCAPE_JSON =
99 new AggregateTranslator(
100 new LookupTranslator(
101 new String[][] {
102 {"\"", "\\\""},
103 {"\\", "\\\\"},
104 {"/", "\\/"}
105 }),
106 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
107 JavaUnicodeEscaper.outsideOf(32, 0x7f)
108 );
109
110 /**
111 * Translator object for escaping XML 1.0.
112 *
113 * While {@link #escapeXml10(String)} is the expected method of use, this
114 * object allows the XML escaping functionality to be used
115 * as the foundation for a custom translator.
116 */
117 public static final CharSequenceTranslator ESCAPE_XML10 =
118 new AggregateTranslator(
119 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
120 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
121 new LookupTranslator(
122 new String[][] {
123 { "\u0000", StringUtils.EMPTY },
124 { "\u0001", StringUtils.EMPTY },
125 { "\u0002", StringUtils.EMPTY },
126 { "\u0003", StringUtils.EMPTY },
127 { "\u0004", StringUtils.EMPTY },
128 { "\u0005", StringUtils.EMPTY },
129 { "\u0006", StringUtils.EMPTY },
130 { "\u0007", StringUtils.EMPTY },
131 { "\u0008", StringUtils.EMPTY },
132 { "\u000b", StringUtils.EMPTY },
133 { "\u000c", StringUtils.EMPTY },
134 { "\u000e", StringUtils.EMPTY },
135 { "\u000f", StringUtils.EMPTY },
136 { "\u0010", StringUtils.EMPTY },
137 { "\u0011", StringUtils.EMPTY },
138 { "\u0012", StringUtils.EMPTY },
139 { "\u0013", StringUtils.EMPTY },
140 { "\u0014", StringUtils.EMPTY },
141 { "\u0015", StringUtils.EMPTY },
142 { "\u0016", StringUtils.EMPTY },
143 { "\u0017", StringUtils.EMPTY },
144 { "\u0018", StringUtils.EMPTY },
145 { "\u0019", StringUtils.EMPTY },
146 { "\u001a", StringUtils.EMPTY },
147 { "\u001b", StringUtils.EMPTY },
148 { "\u001c", StringUtils.EMPTY },
149 { "\u001d", StringUtils.EMPTY },
150 { "\u001e", StringUtils.EMPTY },
151 { "\u001f", StringUtils.EMPTY },
152 { "\ufffe", StringUtils.EMPTY },
153 { "\uffff", StringUtils.EMPTY }
154 }),
155 NumericEntityEscaper.between(0x7f, 0x84),
156 NumericEntityEscaper.between(0x86, 0x9f),
157 new UnicodeUnpairedSurrogateRemover()
158 );
159
160 /**
161 * Translator object for escaping XML 1.1.
162 *
163 * While {@link #escapeXml11(String)} is the expected method of use, this
164 * object allows the XML escaping functionality to be used
165 * as the foundation for a custom translator.
166 */
167 public static final CharSequenceTranslator ESCAPE_XML11 =
168 new AggregateTranslator(
169 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
170 new LookupTranslator(EntityArrays.APOS_ESCAPE()),
171 new LookupTranslator(
172 new String[][] {
173 { "\u0000", StringUtils.EMPTY },
174 { "\u000b", "" },
175 { "\u000c", "" },
176 { "\ufffe", StringUtils.EMPTY },
177 { "\uffff", StringUtils.EMPTY }
178 }),
179 NumericEntityEscaper.between(0x1, 0x8),
180 NumericEntityEscaper.between(0xe, 0x1f),
181 NumericEntityEscaper.between(0x7f, 0x84),
182 NumericEntityEscaper.between(0x86, 0x9f),
183 new UnicodeUnpairedSurrogateRemover()
184 );
185
186 /**
187 * Translator object for escaping HTML version 3.0.
188 *
189 * While {@link #escapeHtml3(String)} is the expected method of use, this
190 * object allows the HTML escaping functionality to be used
191 * as the foundation for a custom translator.
192 */
193 public static final CharSequenceTranslator ESCAPE_HTML3 =
194 new AggregateTranslator(
195 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
196 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
197 );
198
199 /**
200 * The improved translator object for escaping HTML version 3.0.
201 * The 'improved' part of this translator is that it checks if the html is already translated.
202 * This check prevents double, triple, or recursive translations.
203 *
204 * While {@link #escapeHtml3Once(String)} is the expected method of use, this
205 * object allows the HTML escaping functionality to be used
206 * as the foundation for a custom translator.
207 *
208 * Note that, multiple lookup tables should be passed to this translator
209 * instead of passing multiple instances of this translator to the
210 * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
211 * lookup table passed to that instance while deciding whether a value is
212 * already translated or not.
213 */
214 public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
215 new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE());
216
217
218 /**
219 * Translator object for escaping HTML version 4.0.
220 *
221 * While {@link #escapeHtml4(String)} is the expected method of use, this
222 * object allows the HTML escaping functionality to be used
223 * as the foundation for a custom translator.
224 */
225 public static final CharSequenceTranslator ESCAPE_HTML4 =
226 new AggregateTranslator(
227 new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
228 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
229 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
230 );
231
232 /**
233 * The improved translator object for escaping HTML version 4.0.
234 * The 'improved' part of this translator is that it checks if the html is already translated.
235 * This check prevents double, triple, or recursive translations.
236 *
237 * While {@link #escapeHtml4Once(String)} is the expected method of use, this
238 * object allows the HTML escaping functionality to be used
239 * as the foundation for a custom translator.
240 *
241 * Note that, multiple lookup tables should be passed to this translator
242 * instead of passing multiple instances of this translator to the
243 * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
244 * lookup table passed to that instance while deciding whether a value is
245 * already translated or not.
246 */
247 public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
248 new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE(), EntityArrays.ISO8859_1_ESCAPE(), EntityArrays.HTML40_EXTENDED_ESCAPE());
249
250 /**
251 * Translator object for escaping individual Comma Separated Values.
252 *
253 * While {@link #escapeCsv(String)} is the expected method of use, this
254 * object allows the CSV escaping functionality to be used
255 * as the foundation for a custom translator.
256 */
257 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
258
259 // TODO: Create a parent class - 'SinglePassTranslator' ?
260 // It would handle the index checking + length returning,
261 // and could also have an optimization check method.
262 static class CsvEscaper extends CharSequenceTranslator {
263
264 private static final char CSV_DELIMITER = ',';
265 private static final char CSV_QUOTE = '"';
266 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
267 private static final char[] CSV_SEARCH_CHARS =
268 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
269
270 @Override
271 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
272
273 if(index != 0) {
274 throw new IllegalStateException("CsvEscaper should never reach the [1] index");
275 }
276
277 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
278 out.write(input.toString());
279 } else {
280 out.write(CSV_QUOTE);
281 out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
282 out.write(CSV_QUOTE);
283 }
284 return Character.codePointCount(input, 0, input.length());
285 }
286 }
287
288 /**
289 * Translator object for escaping Shell command language.
290 *
291 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
292 */
293 public static final CharSequenceTranslator ESCAPE_XSI =
294 new LookupTranslator(
295 new String[][] {
296 {"|", "\\|"},
297 {"&", "\\&"},
298 {";", "\\;"},
299 {"<", "\\<"},
300 {">", "\\>"},
301 {"(", "\\("},
302 {")", "\\)"},
303 {"$", "\\$"},
304 {"`", "\\`"},
305 {"\\", "\\\\"},
306 {"\"", "\\\""},
307 {"'", "\\'"},
308 {" ", "\\ "},
309 {"\t", "\\\t"},
310 {"\r\n", ""},
311 {"\n", ""},
312 {"*", "\\*"},
313 {"?", "\\?"},
314 {"[", "\\["},
315 {"#", "\\#"},
316 {"~", "\\~"},
317 {"=", "\\="},
318 {"%", "\\%"},
319 });
320
321 /* UNESCAPE TRANSLATORS */
322
323 /**
324 * Translator object for unescaping escaped Java.
325 *
326 * While {@link #unescapeJava(String)} is the expected method of use, this
327 * object allows the Java unescaping functionality to be used
328 * as the foundation for a custom translator.
329 */
330 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
331 public static final CharSequenceTranslator UNESCAPE_JAVA =
332 new AggregateTranslator(
333 new OctalUnescaper(), // .between('\1', '\377'),
334 new UnicodeUnescaper(),
335 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
336 new LookupTranslator(
337 new String[][] {
338 {"\\\\", "\\"},
339 {"\\\"", "\""},
340 {"\\'", "'"},
341 {"\\", ""}
342 })
343 );
344
345 /**
346 * Translator object for unescaping escaped EcmaScript.
347 *
348 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
349 * object allows the EcmaScript unescaping functionality to be used
350 * as the foundation for a custom translator.
351 */
352 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
353
354 /**
355 * Translator object for unescaping escaped Json.
356 *
357 * While {@link #unescapeJson(String)} is the expected method of use, this
358 * object allows the Json unescaping functionality to be used
359 * as the foundation for a custom translator.
360 */
361 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
362
363 /**
364 * Translator object for unescaping escaped HTML 3.0.
365 *
366 * While {@link #unescapeHtml3(String)} is the expected method of use, this
367 * object allows the HTML unescaping functionality to be used
368 * as the foundation for a custom translator.
369 */
370 public static final CharSequenceTranslator UNESCAPE_HTML3 =
371 new AggregateTranslator(
372 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
373 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
374 new NumericEntityUnescaper()
375 );
376
377 /**
378 * Translator object for unescaping escaped HTML 4.0.
379 *
380 * While {@link #unescapeHtml4(String)} is the expected method of use, this
381 * object allows the HTML unescaping functionality to be used
382 * as the foundation for a custom translator.
383 */
384 public static final CharSequenceTranslator UNESCAPE_HTML4 =
385 new AggregateTranslator(
386 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
387 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
388 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
389 new NumericEntityUnescaper()
390 );
391
392 /**
393 * Translator object for unescaping escaped XML.
394 *
395 * While {@link #unescapeXml(String)} is the expected method of use, this
396 * object allows the XML unescaping functionality to be used
397 * as the foundation for a custom translator.
398 */
399 public static final CharSequenceTranslator UNESCAPE_XML =
400 new AggregateTranslator(
401 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
402 new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
403 new NumericEntityUnescaper()
404 );
405
406 /**
407 * Translator object for unescaping escaped Comma Separated Value entries.
408 *
409 * While {@link #unescapeCsv(String)} is the expected method of use, this
410 * object allows the CSV unescaping functionality to be used
411 * as the foundation for a custom translator.
412 */
413 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414
415 static class CsvUnescaper extends CharSequenceTranslator {
416
417 private static final char CSV_DELIMITER = ',';
418 private static final char CSV_QUOTE = '"';
419 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
420 private static final char[] CSV_SEARCH_CHARS =
421 new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
422
423 @Override
424 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
425
426 if(index != 0) {
427 throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
428 }
429
430 if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
431 out.write(input.toString());
432 return Character.codePointCount(input, 0, input.length());
433 }
434
435 // strip quotes
436 final String quoteless = input.subSequence(1, input.length() - 1).toString();
437
438 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
439 // deal with escaped quotes; ie) ""
440 out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
441 } else {
442 out.write(input.toString());
443 }
444 return Character.codePointCount(input, 0, input.length());
445 }
446 }
447
448 public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
449
450 /**
451 * Translator object for unescaping backslash escaped entries.
452 */
453 static class XsiUnescaper extends CharSequenceTranslator {
454
455 private static final char BACKSLASH = '\\';
456
457 @Override
458 public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
459
460 if(index != 0) {
461 throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
462 }
463
464 String s = input.toString();
465
466 int segmentStart = 0;
467 int searchOffset = 0;
468 while (true) {
469 int pos = s.indexOf(BACKSLASH, searchOffset);
470 if (pos == -1) {
471 if (segmentStart < s.length()) {
472 out.write(s.substring(segmentStart));
473 }
474 break;
475 }
476 if (pos > segmentStart) {
477 out.write(s.substring(segmentStart, pos));
478 }
479 segmentStart = pos + 1;
480 searchOffset = pos + 2;
481 }
482
483 return Character.codePointCount(input, 0, input.length());
484 }
485 }
486
487 /* Helper functions */
488
489 /**
490 * <p>{@code StringEscapeUtils} instances should NOT be constructed in
491 * standard programming.</p>
492 *
493 * <p>Instead, the class should be used as:</p>
494 * <pre>StringEscapeUtils.escapeJava("foo");</pre>
495 *
496 * <p>This constructor is public to permit tools that require a JavaBean
497 * instance to operate.</p>
498 */
499 public StringEscapeUtils() {
500 super();
501 }
502
503 /**
504 * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
505 *
506 * <p>Example:</p>
507 * <pre>
508 * new Builder(ESCAPE_HTML4)
509 * .append("<p>")
510 * .escape("This is paragraph 1 and special chars like & get escaped.")
511 * .append("</p><p>")
512 * .escape("This is paragraph 2 & more...")
513 * .append("</p>")
514 * .toString()
515 * </pre>
516 *
517 */
518 public static class Builder {
519
520 private final StringBuilder sb;
521 private final CharSequenceTranslator translator;
522
523 private Builder(final CharSequenceTranslator translator) {
524 this.sb = new StringBuilder();
525 this.translator = translator;
526 }
527
528 /**
529 * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
530 *
531 * @param input the String to escape
532 * @return {@code this}, to enable chaining
533 */
534 public Builder escape(final String input) {
535 sb.append(translator.translate(input));
536 return this;
537 }
538
539 /**
540 * Literal append, no escaping being done.
541 *
542 * @param input the String to append
543 * @return {@code this}, to enable chaining
544 */
545 public Builder append(final String input) {
546 sb.append(input);
547 return this;
548 }
549
550 /**
551 * <p>Return the escaped string.</p>
552 *
553 * @return the escaped string
554 */
555 @Override
556 public String toString() {
557 return sb.toString();
558 }
559 }
560
561 /**
562 * Get a {@link Builder}.
563 * @param translator the text translator
564 * @return {@link Builder}
565 */
566 public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
567 return new Builder(translator);
568 }
569
570 // Java and JavaScript
571 //--------------------------------------------------------------------------
572 /**
573 * <p>Escapes the characters in a {@code String} using Java String rules.</p>
574 *
575 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
576 *
577 * <p>So a tab becomes the characters {@code '\\'} and
578 * {@code 't'}.</p>
579 *
580 * <p>The only difference between Java strings and JavaScript strings
581 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
582 *
583 * <p>Example:</p>
584 * <pre>
585 * input string: He didn't say, "Stop!"
586 * output string: He didn't say, \"Stop!\"
587 * </pre>
588 *
589 * @param input String to escape values in, may be null
590 * @return String with escaped values, {@code null} if null string input
591 */
592 public static final String escapeJava(final String input) {
593 return ESCAPE_JAVA.translate(input);
594 }
595
596 /**
597 * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
598 * <p>Escapes any values it finds into their EcmaScript String form.
599 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
600 *
601 * <p>So a tab becomes the characters {@code '\\'} and
602 * {@code 't'}.</p>
603 *
604 * <p>The only difference between Java strings and EcmaScript strings
605 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
606 *
607 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
608 *
609 * <p>Example:</p>
610 * <pre>
611 * input string: He didn't say, "Stop!"
612 * output string: He didn\'t say, \"Stop!\"
613 * </pre>
614 *
615 * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
616 * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
617 * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
618 * may consider the
619 * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>. Further,
620 * you can view the
621 * <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
622 *
623 * @param input String to escape values in, may be null
624 * @return String with escaped values, {@code null} if null string input
625 */
626 public static final String escapeEcmaScript(final String input) {
627 return ESCAPE_ECMASCRIPT.translate(input);
628 }
629
630 /**
631 * <p>Escapes the characters in a {@code String} using Json String rules.</p>
632 * <p>Escapes any values it finds into their Json String form.
633 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
634 *
635 * <p>So a tab becomes the characters {@code '\\'} and
636 * {@code 't'}.</p>
637 *
638 * <p>The only difference between Java strings and Json strings
639 * is that in Json, forward-slash (/) is escaped.</p>
640 *
641 * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
642 *
643 * <p>Example:</p>
644 * <pre>
645 * input string: He didn't say, "Stop!"
646 * output string: He didn't say, \"Stop!\"
647 * </pre>
648 *
649 * @param input String to escape values in, may be null
650 * @return String with escaped values, {@code null} if null string input
651 */
652 public static final String escapeJson(final String input) {
653 return ESCAPE_JSON.translate(input);
654 }
655
656 /**
657 * <p>Unescapes any Java literals found in the {@code String}.
658 * For example, it will turn a sequence of {@code '\'} and
659 * {@code 'n'} into a newline character, unless the {@code '\'}
660 * is preceded by another {@code '\'}.</p>
661 *
662 * @param input the {@code String} to unescape, may be null
663 * @return a new unescaped {@code String}, {@code null} if null string input
664 */
665 public static final String unescapeJava(final String input) {
666 return UNESCAPE_JAVA.translate(input);
667 }
668
669 /**
670 * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
671 *
672 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
673 * into a newline character, unless the {@code '\'} is preceded by another
674 * {@code '\'}.</p>
675 *
676 * @see #unescapeJava(String)
677 * @param input the {@code String} to unescape, may be null
678 * @return A new unescaped {@code String}, {@code null} if null string input
679 */
680 public static final String unescapeEcmaScript(final String input) {
681 return UNESCAPE_ECMASCRIPT.translate(input);
682 }
683
684 /**
685 * <p>Unescapes any Json literals found in the {@code String}.</p>
686 *
687 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
688 * into a newline character, unless the {@code '\'} is preceded by another
689 * {@code '\'}.</p>
690 *
691 * @see #unescapeJava(String)
692 * @param input the {@code String} to unescape, may be null
693 * @return A new unescaped {@code String}, {@code null} if null string input
694 */
695 public static final String unescapeJson(final String input) {
696 return UNESCAPE_JSON.translate(input);
697 }
698
699 // HTML and XML
700 //--------------------------------------------------------------------------
701 /**
702 * <p>Escapes the characters in a {@code String} using HTML entities.</p>
703 *
704 * <p>
705 * For example:
706 * </p>
707 * <p><code>"bread" & "butter"</code></p>
708 * becomes:
709 * <p>
710 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
711 * </p>
712 *
713 * <p>Supports all known HTML 4.0 entities, including funky accents.
714 * Note that the commonly used apostrophe escape character (&apos;)
715 * is not a legal entity and so is not supported). </p>
716 *
717 * @param input the {@code String} to escape, may be null
718 * @return a new escaped {@code String}, {@code null} if null string input
719 *
720 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
721 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
722 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
723 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
724 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
725 */
726 public static final String escapeHtml4(final String input) {
727 return ESCAPE_HTML4.translate(input);
728 }
729
730 /**
731 * <p>Escapes the characters in a {@code String} using HTML entities.
732 * But escapes them only once. i.e. does not escape already escaped characters.</p>
733 *
734 * <p>
735 * For example:
736 * </p>
737 * <p><code>"bread" & "butter"</code></p>
738 * becomes:
739 * <p>
740 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
741 * </p>
742 *
743 * <p>
744 * But:
745 * </p>
746 * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code></p>
747 * remains unaffected.
748 *
749 * <p>Supports all known HTML 4.0 entities, including funky accents.
750 * Note that the commonly used apostrophe escape character (&apos;)
751 * is not a legal entity and so is not supported). </p>
752 *
753 * @param input the {@code String} to escape, may be null
754 * @return a new escaped {@code String}, {@code null} if null string input
755 *
756 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
757 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
758 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
759 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
760 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
761 */
762 public static final String escapeHtml4Once(final String input) {
763 return ESCAPE_HTML4_ONCE.translate(input);
764 }
765
766
767 /**
768 * <p>Escapes the characters in a {@code String} using HTML entities.</p>
769 * <p>Supports only the HTML 3.0 entities. </p>
770 *
771 * @param input the {@code String} to escape, may be null
772 * @return a new escaped {@code String}, {@code null} if null string input
773 */
774 public static final String escapeHtml3(final String input) {
775 return ESCAPE_HTML3.translate(input);
776 }
777
778 /**
779 * <p>Escapes the characters in a {@code String} using HTML entities.
780 * But escapes them only once. i.e. does not escape already escaped characters.</p>
781 * <p>Supports only the HTML 3.0 entities. </p>
782 *
783 * @param input the {@code String} to escape, may be null
784 * @return a new escaped {@code String}, {@code null} if null string input
785 */
786 public static final String escapeHtml3Once(final String input) {
787 return ESCAPE_HTML3_ONCE.translate(input);
788 }
789
790 //-----------------------------------------------------------------------
791 /**
792 * <p>Unescapes a string containing entity escapes to a string
793 * containing the actual Unicode characters corresponding to the
794 * escapes. Supports HTML 4.0 entities.</p>
795 *
796 * <p>For example, the string {@code "<Français>"}
797 * will become {@code "<Français>"}</p>
798 *
799 * <p>If an entity is unrecognized, it is left alone, and inserted
800 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will
801 * become {@code ">&zzzz;x"}.</p>
802 *
803 * @param input the {@code String} to unescape, may be null
804 * @return a new unescaped {@code String}, {@code null} if null string input
805 */
806 public static final String unescapeHtml4(final String input) {
807 return UNESCAPE_HTML4.translate(input);
808 }
809
810 /**
811 * <p>Unescapes a string containing entity escapes to a string
812 * containing the actual Unicode characters corresponding to the
813 * escapes. Supports only HTML 3.0 entities.</p>
814 *
815 * @param input the {@code String} to unescape, may be null
816 * @return a new unescaped {@code String}, {@code null} if null string input
817 */
818 public static final String unescapeHtml3(final String input) {
819 return UNESCAPE_HTML3.translate(input);
820 }
821
822 /**
823 * <p>Escapes the characters in a {@code String} using XML entities.</p>
824 *
825 * <p>For example: {@code "bread" & "butter"} =>
826 * {@code "bread" & "butter"}.
827 * </p>
828 *
829 * <p>Note that XML 1.0 is a text-only format: it cannot represent control
830 * characters or unpaired Unicode surrogate codepoints, even after escaping.
831 * {@code escapeXml10} will remove characters that do not fit in the
832 * following ranges:</p>
833 *
834 * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
835 *
836 * <p>Though not strictly necessary, {@code escapeXml10} will escape
837 * characters in the following ranges:</p>
838 *
839 * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
840 *
841 * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
842 * document. If you want to allow more non-text characters in an XML 1.1
843 * document, use {@link #escapeXml11(String)}.</p>
844 *
845 * @param input the {@code String} to escape, may be null
846 * @return a new escaped {@code String}, {@code null} if null string input
847 * @see #unescapeXml(java.lang.String)
848 */
849 public static String escapeXml10(final String input) {
850 return ESCAPE_XML10.translate(input);
851 }
852
853 /**
854 * <p>Escapes the characters in a {@code String} using XML entities.</p>
855 *
856 * <p>For example: {@code "bread" & "butter"} =>
857 * {@code "bread" & "butter"}.
858 * </p>
859 *
860 * <p>XML 1.1 can represent certain control characters, but it cannot represent
861 * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
862 * {@code escapeXml11} will remove characters that do not fit in the following
863 * ranges:</p>
864 *
865 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
866 *
867 * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
868 *
869 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
870 *
871 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
872 * use it for XML 1.0 documents.</p>
873 *
874 * @param input the {@code String} to escape, may be null
875 * @return a new escaped {@code String}, {@code null} if null string input
876 * @see #unescapeXml(java.lang.String)
877 */
878 public static String escapeXml11(final String input) {
879 return ESCAPE_XML11.translate(input);
880 }
881
882 //-----------------------------------------------------------------------
883 /**
884 * <p>Unescapes a string containing XML entity escapes to a string
885 * containing the actual Unicode characters corresponding to the
886 * escapes.</p>
887 *
888 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
889 * Does not support DTDs or external entities.</p>
890 *
891 * <p>Note that numerical \\u Unicode codes are unescaped to their respective
892 * Unicode characters. This may change in future releases. </p>
893 *
894 * @param input the {@code String} to unescape, may be null
895 * @return a new unescaped {@code String}, {@code null} if null string input
896 * @see #escapeXml10(String)
897 * @see #escapeXml11(String)
898 */
899 public static final String unescapeXml(final String input) {
900 return UNESCAPE_XML.translate(input);
901 }
902
903 //-----------------------------------------------------------------------
904
905 /**
906 * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
907 * if required.</p>
908 *
909 * <p>If the value contains a comma, newline or double quote, then the
910 * String value is returned enclosed in double quotes.</p>
911 *
912 * <p>Any double quote characters in the value are escaped with another double quote.</p>
913 *
914 * <p>If the value does not contain a comma, newline or double quote, then the
915 * String value is returned unchanged.</p>
916 *
917 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
918 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
919 *
920 * @param input the input CSV column String, may be null
921 * @return the input String, enclosed in double quotes if the value contains a comma,
922 * newline or double quote, {@code null} if null string input
923 */
924 public static final String escapeCsv(final String input) {
925 return ESCAPE_CSV.translate(input);
926 }
927
928 /**
929 * <p>Returns a {@code String} value for an unescaped CSV column. </p>
930 *
931 * <p>If the value is enclosed in double quotes, and contains a comma, newline
932 * or double quote, then quotes are removed.
933 * </p>
934 *
935 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
936 * to just one double quote. </p>
937 *
938 * <p>If the value is not enclosed in double quotes, or is and does not contain a
939 * comma, newline or double quote, then the String value is returned unchanged.</p>
940 *
941 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
942 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
943 *
944 * @param input the input CSV column String, may be null
945 * @return the input String, with enclosing double quotes removed and embedded double
946 * quotes unescaped, {@code null} if null string input
947 */
948 public static final String unescapeCsv(final String input) {
949 return UNESCAPE_CSV.translate(input);
950 }
951
952 /**
953 * <p>Escapes the characters in a {@code String} using XSI rules.</p>
954 *
955 * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
956 * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
957 * instead.</p>
958 *
959 * <p>Example:</p>
960 * <pre>
961 * input string: He didn't say, "Stop!"
962 * output string: He\ didn\'t\ say,\ \"Stop!\"
963 * </pre>
964 *
965 * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
966 * @param input String to escape values in, may be null
967 * @return String with escaped values, {@code null} if null string input
968 */
969 public static final String escapeXSI(final String input) {
970 return ESCAPE_XSI.translate(input);
971 }
972
973 /**
974 * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
975 *
976 * @see StringEscapeUtils#escapeXSI(String)
977 * @param input the {@code String} to unescape, may be null
978 * @return a new unescaped {@code String}, {@code null} if null string input
979 */
980 public static final String unescapeXSI(final String input) {
981 return UNESCAPE_XSI.translate(input);
982 }
983
984 }